gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010
   5    Free Software Foundation, Inc.
   6
   7 This file is part of GCC.
   8
   9 GCC is free software; you can redistribute it and/or modify it under
  10 the terms of the GNU General Public License as published by the Free
  11 Software Foundation; either version 3, or (at your option) any later
  12 version.
  13
  14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  16 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  17 for more details.
  18
  19 You should have received a copy of the GNU General Public License
  20 along with GCC; see the file COPYING3.  If not see
  21 <http://www.gnu.org/licenses/>.  */
  22
  23
  24 #include "config.h"
  25 #include "system.h"
  26 #include "coretypes.h"
  27 #include "tm.h"
  28 #include "diagnostic-core.h"
  29 #include "toplev.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT, rtx);
  51 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  52                                    unsigned HOST_WIDE_INT, rtx);
  53 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  54                                     unsigned HOST_WIDE_INT,
  55                                     unsigned HOST_WIDE_INT,
  56                                     unsigned HOST_WIDE_INT, rtx, int);
  57 static rtx mask_rtx (enum machine_mode, int, int, int);
  58 static rtx lshift_value (enum machine_mode, rtx, int, int);
  59 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, int);
  61 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  62 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  63 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  64
  65 /* Test whether a value is zero of a power of two.  */
  66 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  67
  68 #ifndef SLOW_UNALIGNED_ACCESS
  69 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  70 #endif
  71
  72 /* For compilers that support multiple targets with different word sizes,
  73    MAX_BITS_PER_WORD contains the biggest value of BITS_PER_WORD.  An example
  74    is the H8/300(H) compiler.  */
  75
  76 #ifndef MAX_BITS_PER_WORD
  77 #define MAX_BITS_PER_WORD BITS_PER_WORD
  78 #endif
  79
  80 /* Reduce conditional compilation elsewhere.  */
  81 #ifndef HAVE_insv
  82 #define HAVE_insv       0
  83 #define CODE_FOR_insv   CODE_FOR_nothing
  84 #define gen_insv(a,b,c,d) NULL_RTX
  85 #endif
  86 #ifndef HAVE_extv
  87 #define HAVE_extv       0
  88 #define CODE_FOR_extv   CODE_FOR_nothing
  89 #define gen_extv(a,b,c,d) NULL_RTX
  90 #endif
  91 #ifndef HAVE_extzv
  92 #define HAVE_extzv      0
  93 #define CODE_FOR_extzv  CODE_FOR_nothing
  94 #define gen_extzv(a,b,c,d) NULL_RTX
  95 #endif
  96
  97 void
  98 init_expmed (void)
  99 {
 100   struct
 101   {
 102     struct rtx_def reg;         rtunion reg_fld[2];
 103     struct rtx_def plus;        rtunion plus_fld1;
 104     struct rtx_def neg;
 105     struct rtx_def mult;        rtunion mult_fld1;
 106     struct rtx_def sdiv;        rtunion sdiv_fld1;
 107     struct rtx_def udiv;        rtunion udiv_fld1;
 108     struct rtx_def zext;
 109     struct rtx_def sdiv_32;     rtunion sdiv_32_fld1;
 110     struct rtx_def smod_32;     rtunion smod_32_fld1;
 111     struct rtx_def wide_mult;   rtunion wide_mult_fld1;
 112     struct rtx_def wide_lshr;   rtunion wide_lshr_fld1;
 113     struct rtx_def wide_trunc;
 114     struct rtx_def shift;       rtunion shift_fld1;
 115     struct rtx_def shift_mult;  rtunion shift_mult_fld1;
 116     struct rtx_def shift_add;   rtunion shift_add_fld1;
 117     struct rtx_def shift_sub0;  rtunion shift_sub0_fld1;
 118     struct rtx_def shift_sub1;  rtunion shift_sub1_fld1;
 119   } all;
 120
 121   rtx pow2[MAX_BITS_PER_WORD];
 122   rtx cint[MAX_BITS_PER_WORD];
 123   int m, n;
 124   enum machine_mode mode, wider_mode;
 125   int speed;
 126
 127
 128   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 129     {
 130       pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 131       cint[m] = GEN_INT (m);
 132     }
 133   memset (&all, 0, sizeof all);
 134
 135   PUT_CODE (&all.reg, REG);
 136   /* Avoid using hard regs in ways which may be unsupported.  */
 137   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 138
 139   PUT_CODE (&all.plus, PLUS);
 140   XEXP (&all.plus, 0) = &all.reg;
 141   XEXP (&all.plus, 1) = &all.reg;
 142
 143   PUT_CODE (&all.neg, NEG);
 144   XEXP (&all.neg, 0) = &all.reg;
 145
 146   PUT_CODE (&all.mult, MULT);
 147   XEXP (&all.mult, 0) = &all.reg;
 148   XEXP (&all.mult, 1) = &all.reg;
 149
 150   PUT_CODE (&all.sdiv, DIV);
 151   XEXP (&all.sdiv, 0) = &all.reg;
 152   XEXP (&all.sdiv, 1) = &all.reg;
 153
 154   PUT_CODE (&all.udiv, UDIV);
 155   XEXP (&all.udiv, 0) = &all.reg;
 156   XEXP (&all.udiv, 1) = &all.reg;
 157
 158   PUT_CODE (&all.sdiv_32, DIV);
 159   XEXP (&all.sdiv_32, 0) = &all.reg;
 160   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? cint[32] : GEN_INT (32);
 161
 162   PUT_CODE (&all.smod_32, MOD);
 163   XEXP (&all.smod_32, 0) = &all.reg;
 164   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 165
 166   PUT_CODE (&all.zext, ZERO_EXTEND);
 167   XEXP (&all.zext, 0) = &all.reg;
 168
 169   PUT_CODE (&all.wide_mult, MULT);
 170   XEXP (&all.wide_mult, 0) = &all.zext;
 171   XEXP (&all.wide_mult, 1) = &all.zext;
 172
 173   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 174   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 175
 176   PUT_CODE (&all.wide_trunc, TRUNCATE);
 177   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 178
 179   PUT_CODE (&all.shift, ASHIFT);
 180   XEXP (&all.shift, 0) = &all.reg;
 181
 182   PUT_CODE (&all.shift_mult, MULT);
 183   XEXP (&all.shift_mult, 0) = &all.reg;
 184
 185   PUT_CODE (&all.shift_add, PLUS);
 186   XEXP (&all.shift_add, 0) = &all.shift_mult;
 187   XEXP (&all.shift_add, 1) = &all.reg;
 188
 189   PUT_CODE (&all.shift_sub0, MINUS);
 190   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 191   XEXP (&all.shift_sub0, 1) = &all.reg;
 192
 193   PUT_CODE (&all.shift_sub1, MINUS);
 194   XEXP (&all.shift_sub1, 0) = &all.reg;
 195   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 196
 197   for (speed = 0; speed < 2; speed++)
 198     {
 199       crtl->maybe_hot_insn_p = speed;
 200       zero_cost[speed] = rtx_cost (const0_rtx, SET, speed);
 201
 202       for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT);
 203            mode != VOIDmode;
 204            mode = GET_MODE_WIDER_MODE (mode))
 205         {
 206           PUT_MODE (&all.reg, mode);
 207           PUT_MODE (&all.plus, mode);
 208           PUT_MODE (&all.neg, mode);
 209           PUT_MODE (&all.mult, mode);
 210           PUT_MODE (&all.sdiv, mode);
 211           PUT_MODE (&all.udiv, mode);
 212           PUT_MODE (&all.sdiv_32, mode);
 213           PUT_MODE (&all.smod_32, mode);
 214           PUT_MODE (&all.wide_trunc, mode);
 215           PUT_MODE (&all.shift, mode);
 216           PUT_MODE (&all.shift_mult, mode);
 217           PUT_MODE (&all.shift_add, mode);
 218           PUT_MODE (&all.shift_sub0, mode);
 219           PUT_MODE (&all.shift_sub1, mode);
 220
 221           add_cost[speed][mode] = rtx_cost (&all.plus, SET, speed);
 222           neg_cost[speed][mode] = rtx_cost (&all.neg, SET, speed);
 223           mul_cost[speed][mode] = rtx_cost (&all.mult, SET, speed);
 224           sdiv_cost[speed][mode] = rtx_cost (&all.sdiv, SET, speed);
 225           udiv_cost[speed][mode] = rtx_cost (&all.udiv, SET, speed);
 226
 227           sdiv_pow2_cheap[speed][mode] = (rtx_cost (&all.sdiv_32, SET, speed)
 228                                           <= 2 * add_cost[speed][mode]);
 229           smod_pow2_cheap[speed][mode] = (rtx_cost (&all.smod_32, SET, speed)
 230                                           <= 4 * add_cost[speed][mode]);
 231
 232           wider_mode = GET_MODE_WIDER_MODE (mode);
 233           if (wider_mode != VOIDmode)
 234             {
 235               PUT_MODE (&all.zext, wider_mode);
 236               PUT_MODE (&all.wide_mult, wider_mode);
 237               PUT_MODE (&all.wide_lshr, wider_mode);
 238               XEXP (&all.wide_lshr, 1) = GEN_INT (GET_MODE_BITSIZE (mode));
 239
 240               mul_widen_cost[speed][wider_mode]
 241                 = rtx_cost (&all.wide_mult, SET, speed);
 242               mul_highpart_cost[speed][mode]
 243                 = rtx_cost (&all.wide_trunc, SET, speed);
 244             }
 245
 246           shift_cost[speed][mode][0] = 0;
 247           shiftadd_cost[speed][mode][0] = shiftsub0_cost[speed][mode][0]
 248             = shiftsub1_cost[speed][mode][0] = add_cost[speed][mode];
 249
 250           n = MIN (MAX_BITS_PER_WORD, GET_MODE_BITSIZE (mode));
 251           for (m = 1; m < n; m++)
 252             {
 253               XEXP (&all.shift, 1) = cint[m];
 254               XEXP (&all.shift_mult, 1) = pow2[m];
 255
 256               shift_cost[speed][mode][m] = rtx_cost (&all.shift, SET, speed);
 257               shiftadd_cost[speed][mode][m] = rtx_cost (&all.shift_add, SET, speed);
 258               shiftsub0_cost[speed][mode][m] = rtx_cost (&all.shift_sub0, SET, speed);
 259               shiftsub1_cost[speed][mode][m] = rtx_cost (&all.shift_sub1, SET, speed);
 260             }
 261         }
 262     }
 263   if (alg_hash_used_p)
 264     memset (alg_hash, 0, sizeof (alg_hash));
 265   else
 266     alg_hash_used_p = true;
 267   default_rtl_profile ();
 268 }
 269
 270 /* Return an rtx representing minus the value of X.
 271    MODE is the intended mode of the result,
 272    useful if X is a CONST_INT.  */
 273
 274 rtx
 275 negate_rtx (enum machine_mode mode, rtx x)
 276 {
 277   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 278
 279   if (result == 0)
 280     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 281
 282   return result;
 283 }
 284
 285 /* Report on the availability of insv/extv/extzv and the desired mode
 286    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 287    is false; else the mode of the specified operand.  If OPNO is -1,
 288    all the caller cares about is whether the insn is available.  */
 289 enum machine_mode
 290 mode_for_extraction (enum extraction_pattern pattern, int opno)
 291 {
 292   const struct insn_data_d *data;
 293
 294   switch (pattern)
 295     {
 296     case EP_insv:
 297       if (HAVE_insv)
 298         {
 299           data = &insn_data[CODE_FOR_insv];
 300           break;
 301         }
 302       return MAX_MACHINE_MODE;
 303
 304     case EP_extv:
 305       if (HAVE_extv)
 306         {
 307           data = &insn_data[CODE_FOR_extv];
 308           break;
 309         }
 310       return MAX_MACHINE_MODE;
 311
 312     case EP_extzv:
 313       if (HAVE_extzv)
 314         {
 315           data = &insn_data[CODE_FOR_extzv];
 316           break;
 317         }
 318       return MAX_MACHINE_MODE;
 319
 320     default:
 321       gcc_unreachable ();
 322     }
 323
 324   if (opno == -1)
 325     return VOIDmode;
 326
 327   /* Everyone who uses this function used to follow it with
 328      if (result == VOIDmode) result = word_mode; */
 329   if (data->operand[opno].mode == VOIDmode)
 330     return word_mode;
 331   return data->operand[opno].mode;
 332 }
 333
 334 /* Return true if X, of mode MODE, matches the predicate for operand
 335    OPNO of instruction ICODE.  Allow volatile memories, regardless of
 336    the ambient volatile_ok setting.  */
 337
 338 static bool
 339 check_predicate_volatile_ok (enum insn_code icode, int opno,
 340                              rtx x, enum machine_mode mode)
 341 {
 342   bool save_volatile_ok, result;
 343
 344   save_volatile_ok = volatile_ok;
 345   result = insn_data[(int) icode].operand[opno].predicate (x, mode);
 346   volatile_ok = save_volatile_ok;
 347   return result;
 348 }
 349 \f
 350 /* A subroutine of store_bit_field, with the same arguments.  Return true
 351    if the operation could be implemented.
 352
 353    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 354    no other way of implementing the operation.  If FALLBACK_P is false,
 355    return false instead.  */
 356
 357 static bool
 358 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 359                    unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 360                    rtx value, bool fallback_p)
 361 {
 362   unsigned int unit
 363     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
 364   unsigned HOST_WIDE_INT offset, bitpos;
 365   rtx op0 = str_rtx;
 366   int byte_offset;
 367   rtx orig_value;
 368
 369   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 370
 371   while (GET_CODE (op0) == SUBREG)
 372     {
 373       /* The following line once was done only if WORDS_BIG_ENDIAN,
 374          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 375          meaningful at a much higher level; when structures are copied
 376          between memory and regs, the higher-numbered regs
 377          always get higher addresses.  */
 378       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 379       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 380
 381       byte_offset = 0;
 382
 383       /* Paradoxical subregs need special handling on big endian machines.  */
 384       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 385         {
 386           int difference = inner_mode_size - outer_mode_size;
 387
 388           if (WORDS_BIG_ENDIAN)
 389             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 390           if (BYTES_BIG_ENDIAN)
 391             byte_offset += difference % UNITS_PER_WORD;
 392         }
 393       else
 394         byte_offset = SUBREG_BYTE (op0);
 395
 396       bitnum += byte_offset * BITS_PER_UNIT;
 397       op0 = SUBREG_REG (op0);
 398     }
 399
 400   /* No action is needed if the target is a register and if the field
 401      lies completely outside that register.  This can occur if the source
 402      code contains an out-of-bounds access to a small array.  */
 403   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 404     return true;
 405
 406   /* Use vec_set patterns for inserting parts of vectors whenever
 407      available.  */
 408   if (VECTOR_MODE_P (GET_MODE (op0))
 409       && !MEM_P (op0)
 410       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 411       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 412       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 413       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 414     {
 415       enum machine_mode outermode = GET_MODE (op0);
 416       enum machine_mode innermode = GET_MODE_INNER (outermode);
 417       int icode = (int) optab_handler (vec_set_optab, outermode);
 418       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 419       rtx rtxpos = GEN_INT (pos);
 420       rtx src = value;
 421       rtx dest = op0;
 422       rtx pat, seq;
 423       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
 424       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
 425       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
 426
 427       start_sequence ();
 428
 429       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
 430         src = copy_to_mode_reg (mode1, src);
 431
 432       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
 433         rtxpos = copy_to_mode_reg (mode1, rtxpos);
 434
 435       /* We could handle this, but we should always be called with a pseudo
 436          for our targets and all insns should take them as outputs.  */
 437       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
 438                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
 439                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
 440       pat = GEN_FCN (icode) (dest, src, rtxpos);
 441       seq = get_insns ();
 442       end_sequence ();
 443       if (pat)
 444         {
 445           emit_insn (seq);
 446           emit_insn (pat);
 447           return true;
 448         }
 449     }
 450
 451   /* If the target is a register, overwriting the entire object, or storing
 452      a full-word or multi-word field can be done with just a SUBREG.
 453
 454      If the target is memory, storing any naturally aligned field can be
 455      done with a simple store.  For targets that support fast unaligned
 456      memory, any naturally sized, unit aligned field can be done directly.  */
 457
 458   offset = bitnum / unit;
 459   bitpos = bitnum % unit;
 460   byte_offset = (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 461                 + (offset * UNITS_PER_WORD);
 462
 463   if (bitpos == 0
 464       && bitsize == GET_MODE_BITSIZE (fieldmode)
 465       && (!MEM_P (op0)
 466           ? ((GET_MODE_SIZE (fieldmode) >= UNITS_PER_WORD
 467              || GET_MODE_SIZE (GET_MODE (op0)) == GET_MODE_SIZE (fieldmode))
 468              && byte_offset % GET_MODE_SIZE (fieldmode) == 0)
 469           : (! SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 470              || (offset * BITS_PER_UNIT % bitsize == 0
 471                  && MEM_ALIGN (op0) % GET_MODE_BITSIZE (fieldmode) == 0))))
 472     {
 473       if (MEM_P (op0))
 474         op0 = adjust_address (op0, fieldmode, offset);
 475       else if (GET_MODE (op0) != fieldmode)
 476         op0 = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 477                                    byte_offset);
 478       emit_move_insn (op0, value);
 479       return true;
 480     }
 481
 482   /* Make sure we are playing with integral modes.  Pun with subregs
 483      if we aren't.  This must come after the entire register case above,
 484      since that case is valid for any mode.  The following cases are only
 485      valid for integral modes.  */
 486   {
 487     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 488     if (imode != GET_MODE (op0))
 489       {
 490         if (MEM_P (op0))
 491           op0 = adjust_address (op0, imode, 0);
 492         else
 493           {
 494             gcc_assert (imode != BLKmode);
 495             op0 = gen_lowpart (imode, op0);
 496           }
 497       }
 498   }
 499
 500   /* We may be accessing data outside the field, which means
 501      we can alias adjacent data.  */
 502   if (MEM_P (op0))
 503     {
 504       op0 = shallow_copy_rtx (op0);
 505       set_mem_alias_set (op0, 0);
 506       set_mem_expr (op0, 0);
 507     }
 508
 509   /* If OP0 is a register, BITPOS must count within a word.
 510      But as we have it, it counts within whatever size OP0 now has.
 511      On a bigendian machine, these are not the same, so convert.  */
 512   if (BYTES_BIG_ENDIAN
 513       && !MEM_P (op0)
 514       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
 515     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 516
 517   /* Storing an lsb-aligned field in a register
 518      can be done with a movestrict instruction.  */
 519
 520   if (!MEM_P (op0)
 521       && (BYTES_BIG_ENDIAN ? bitpos + bitsize == unit : bitpos == 0)
 522       && bitsize == GET_MODE_BITSIZE (fieldmode)
 523       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 524     {
 525       int icode = optab_handler (movstrict_optab, fieldmode);
 526       rtx insn;
 527       rtx start = get_last_insn ();
 528       rtx arg0 = op0;
 529
 530       /* Get appropriate low part of the value being stored.  */
 531       if (CONST_INT_P (value) || REG_P (value))
 532         value = gen_lowpart (fieldmode, value);
 533       else if (!(GET_CODE (value) == SYMBOL_REF
 534                  || GET_CODE (value) == LABEL_REF
 535                  || GET_CODE (value) == CONST))
 536         value = convert_to_mode (fieldmode, value, 0);
 537
 538       if (! (*insn_data[icode].operand[1].predicate) (value, fieldmode))
 539         value = copy_to_mode_reg (fieldmode, value);
 540
 541       if (GET_CODE (op0) == SUBREG)
 542         {
 543           /* Else we've got some float mode source being extracted into
 544              a different float mode destination -- this combination of
 545              subregs results in Severe Tire Damage.  */
 546           gcc_assert (GET_MODE (SUBREG_REG (op0)) == fieldmode
 547                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 548                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 549           arg0 = SUBREG_REG (op0);
 550         }
 551
 552       insn = (GEN_FCN (icode)
 553                  (gen_rtx_SUBREG (fieldmode, arg0,
 554                                   (bitnum % BITS_PER_WORD) / BITS_PER_UNIT
 555                                   + (offset * UNITS_PER_WORD)),
 556                                   value));
 557       if (insn)
 558         {
 559           emit_insn (insn);
 560           return true;
 561         }
 562       delete_insns_since (start);
 563     }
 564
 565   /* Handle fields bigger than a word.  */
 566
 567   if (bitsize > BITS_PER_WORD)
 568     {
 569       /* Here we transfer the words of the field
 570          in the order least significant first.
 571          This is because the most significant word is the one which may
 572          be less than full.
 573          However, only do that if the value is not BLKmode.  */
 574
 575       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 576       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 577       unsigned int i;
 578       rtx last;
 579
 580       /* This is the mode we must force value to, so that there will be enough
 581          subwords to extract.  Note that fieldmode will often (always?) be
 582          VOIDmode, because that is what store_field uses to indicate that this
 583          is a bit field, but passing VOIDmode to operand_subword_force
 584          is not allowed.  */
 585       fieldmode = GET_MODE (value);
 586       if (fieldmode == VOIDmode)
 587         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 588
 589       last = get_last_insn ();
 590       for (i = 0; i < nwords; i++)
 591         {
 592           /* If I is 0, use the low-order word in both field and target;
 593              if I is 1, use the next to lowest word; and so on.  */
 594           unsigned int wordnum = (backwards ? nwords - i - 1 : i);
 595           unsigned int bit_offset = (backwards
 596                                      ? MAX ((int) bitsize - ((int) i + 1)
 597                                             * BITS_PER_WORD,
 598                                             0)
 599                                      : (int) i * BITS_PER_WORD);
 600           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 601
 602           if (!store_bit_field_1 (op0, MIN (BITS_PER_WORD,
 603                                             bitsize - i * BITS_PER_WORD),
 604                                   bitnum + bit_offset, word_mode,
 605                                   value_word, fallback_p))
 606             {
 607               delete_insns_since (last);
 608               return false;
 609             }
 610         }
 611       return true;
 612     }
 613
 614   /* From here on we can assume that the field to be stored in is
 615      a full-word (whatever type that is), since it is shorter than a word.  */
 616
 617   /* OFFSET is the number of words or bytes (UNIT says which)
 618      from STR_RTX to the first word or byte containing part of the field.  */
 619
 620   if (!MEM_P (op0))
 621     {
 622       if (offset != 0
 623           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 624         {
 625           if (!REG_P (op0))
 626             {
 627               /* Since this is a destination (lvalue), we can't copy
 628                  it to a pseudo.  We can remove a SUBREG that does not
 629                  change the size of the operand.  Such a SUBREG may
 630                  have been added above.  */
 631               gcc_assert (GET_CODE (op0) == SUBREG
 632                           && (GET_MODE_SIZE (GET_MODE (op0))
 633                               == GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)))));
 634               op0 = SUBREG_REG (op0);
 635             }
 636           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
 637                                 op0, (offset * UNITS_PER_WORD));
 638         }
 639       offset = 0;
 640     }
 641
 642   /* If VALUE has a floating-point or complex mode, access it as an
 643      integer of the corresponding size.  This can occur on a machine
 644      with 64 bit registers that uses SFmode for float.  It can also
 645      occur for unaligned float or complex fields.  */
 646   orig_value = value;
 647   if (GET_MODE (value) != VOIDmode
 648       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 649       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 650     {
 651       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 652       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 653     }
 654
 655   /* Now OFFSET is nonzero only if OP0 is memory
 656      and is therefore always measured in bytes.  */
 657
 658   if (HAVE_insv
 659       && GET_MODE (value) != BLKmode
 660       && bitsize > 0
 661       && GET_MODE_BITSIZE (op_mode) >= bitsize
 662       && ! ((REG_P (op0) || GET_CODE (op0) == SUBREG)
 663             && (bitsize + bitpos > GET_MODE_BITSIZE (op_mode)))
 664       && insn_data[CODE_FOR_insv].operand[1].predicate (GEN_INT (bitsize),
 665                                                         VOIDmode)
 666       && check_predicate_volatile_ok (CODE_FOR_insv, 0, op0, VOIDmode))
 667     {
 668       int xbitpos = bitpos;
 669       rtx value1;
 670       rtx xop0 = op0;
 671       rtx last = get_last_insn ();
 672       rtx pat;
 673       bool copy_back = false;
 674
 675       /* Add OFFSET into OP0's address.  */
 676       if (MEM_P (xop0))
 677         xop0 = adjust_address (xop0, byte_mode, offset);
 678
 679       /* If xop0 is a register, we need it in OP_MODE
 680          to make it acceptable to the format of insv.  */
 681       if (GET_CODE (xop0) == SUBREG)
 682         /* We can't just change the mode, because this might clobber op0,
 683            and we will need the original value of op0 if insv fails.  */
 684         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 685       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 686         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 687
 688       /* If the destination is a paradoxical subreg such that we need a
 689          truncate to the inner mode, perform the insertion on a temporary and
 690          truncate the result to the original destination.  Note that we can't
 691          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 692          X) 0)) is (reg:N X).  */
 693       if (GET_CODE (xop0) == SUBREG
 694           && REG_P (SUBREG_REG (xop0))
 695           && (!TRULY_NOOP_TRUNCATION
 696               (GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (xop0))),
 697                GET_MODE_BITSIZE (op_mode))))
 698         {
 699           rtx tem = gen_reg_rtx (op_mode);
 700           emit_move_insn (tem, xop0);
 701           xop0 = tem;
 702           copy_back = true;
 703         }
 704
 705       /* On big-endian machines, we count bits from the most significant.
 706          If the bit field insn does not, we must invert.  */
 707
 708       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 709         xbitpos = unit - bitsize - xbitpos;
 710
 711       /* We have been counting XBITPOS within UNIT.
 712          Count instead within the size of the register.  */
 713       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
 714         xbitpos += GET_MODE_BITSIZE (op_mode) - unit;
 715
 716       unit = GET_MODE_BITSIZE (op_mode);
 717
 718       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 719       value1 = value;
 720       if (GET_MODE (value) != op_mode)
 721         {
 722           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 723             {
 724               /* Optimization: Don't bother really extending VALUE
 725                  if it has all the bits we will actually use.  However,
 726                  if we must narrow it, be sure we do it correctly.  */
 727
 728               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 729                 {
 730                   rtx tmp;
 731
 732                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 733                   if (! tmp)
 734                     tmp = simplify_gen_subreg (op_mode,
 735                                                force_reg (GET_MODE (value),
 736                                                           value1),
 737                                                GET_MODE (value), 0);
 738                   value1 = tmp;
 739                 }
 740               else
 741                 value1 = gen_lowpart (op_mode, value1);
 742             }
 743           else if (CONST_INT_P (value))
 744             value1 = gen_int_mode (INTVAL (value), op_mode);
 745           else
 746             /* Parse phase is supposed to make VALUE's data type
 747                match that of the component reference, which is a type
 748                at least as wide as the field; so VALUE should have
 749                a mode that corresponds to that type.  */
 750             gcc_assert (CONSTANT_P (value));
 751         }
 752
 753       /* If this machine's insv insists on a register,
 754          get VALUE1 into a register.  */
 755       if (! ((*insn_data[(int) CODE_FOR_insv].operand[3].predicate)
 756              (value1, op_mode)))
 757         value1 = force_reg (op_mode, value1);
 758
 759       pat = gen_insv (xop0, GEN_INT (bitsize), GEN_INT (xbitpos), value1);
 760       if (pat)
 761         {
 762           emit_insn (pat);
 763
 764           if (copy_back)
 765             convert_move (op0, xop0, true);
 766           return true;
 767         }
 768       delete_insns_since (last);
 769     }
 770
 771   /* If OP0 is a memory, try copying it to a register and seeing if a
 772      cheap register alternative is available.  */
 773   if (HAVE_insv && MEM_P (op0))
 774     {
 775       enum machine_mode bestmode;
 776
 777       /* Get the mode to use for inserting into this field.  If OP0 is
 778          BLKmode, get the smallest mode consistent with the alignment. If
 779          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 780          mode. Otherwise, use the smallest mode containing the field.  */
 781
 782       if (GET_MODE (op0) == BLKmode
 783           || (op_mode != MAX_MACHINE_MODE
 784               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 785         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
 786                                   (op_mode == MAX_MACHINE_MODE
 787                                    ? VOIDmode : op_mode),
 788                                   MEM_VOLATILE_P (op0));
 789       else
 790         bestmode = GET_MODE (op0);
 791
 792       if (bestmode != VOIDmode
 793           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 794           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 795                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 796         {
 797           rtx last, tempreg, xop0;
 798           unsigned HOST_WIDE_INT xoffset, xbitpos;
 799
 800           last = get_last_insn ();
 801
 802           /* Adjust address to point to the containing unit of
 803              that mode.  Compute the offset as a multiple of this unit,
 804              counting in bytes.  */
 805           unit = GET_MODE_BITSIZE (bestmode);
 806           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 807           xbitpos = bitnum % unit;
 808           xop0 = adjust_address (op0, bestmode, xoffset);
 809
 810           /* Fetch that unit, store the bitfield in it, then store
 811              the unit.  */
 812           tempreg = copy_to_reg (xop0);
 813           if (store_bit_field_1 (tempreg, bitsize, xbitpos,
 814                                  fieldmode, orig_value, false))
 815             {
 816               emit_move_insn (xop0, tempreg);
 817               return true;
 818             }
 819           delete_insns_since (last);
 820         }
 821     }
 822
 823   if (!fallback_p)
 824     return false;
 825
 826   store_fixed_bit_field (op0, offset, bitsize, bitpos, value);
 827   return true;
 828 }
 829
 830 /* Generate code to store value from rtx VALUE
 831    into a bit-field within structure STR_RTX
 832    containing BITSIZE bits starting at bit BITNUM.
 833    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 834
 835 void
 836 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 837                  unsigned HOST_WIDE_INT bitnum, enum machine_mode fieldmode,
 838                  rtx value)
 839 {
 840   if (!store_bit_field_1 (str_rtx, bitsize, bitnum, fieldmode, value, true))
 841     gcc_unreachable ();
 842 }
 843 \f
 844 /* Use shifts and boolean operations to store VALUE
 845    into a bit field of width BITSIZE
 846    in a memory location specified by OP0 except offset by OFFSET bytes.
 847      (OFFSET must be 0 if OP0 is a register.)
 848    The field starts at position BITPOS within the byte.
 849     (If OP0 is a register, it may be a full word or a narrower mode,
 850      but BITPOS still counts within a full word,
 851      which is significant on bigendian machines.)  */
 852
 853 static void
 854 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT offset,
 855                        unsigned HOST_WIDE_INT bitsize,
 856                        unsigned HOST_WIDE_INT bitpos, rtx value)
 857 {
 858   enum machine_mode mode;
 859   unsigned int total_bits = BITS_PER_WORD;
 860   rtx temp;
 861   int all_zero = 0;
 862   int all_one = 0;
 863
 864   /* There is a case not handled here:
 865      a structure with a known alignment of just a halfword
 866      and a field split across two aligned halfwords within the structure.
 867      Or likewise a structure with a known alignment of just a byte
 868      and a field split across two bytes.
 869      Such cases are not supposed to be able to occur.  */
 870
 871   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
 872     {
 873       gcc_assert (!offset);
 874       /* Special treatment for a bit field split across two registers.  */
 875       if (bitsize + bitpos > BITS_PER_WORD)
 876         {
 877           store_split_bit_field (op0, bitsize, bitpos, value);
 878           return;
 879         }
 880     }
 881   else
 882     {
 883       /* Get the proper mode to use for this field.  We want a mode that
 884          includes the entire field.  If such a mode would be larger than
 885          a word, we won't be doing the extraction the normal way.
 886          We don't want a mode bigger than the destination.  */
 887
 888       mode = GET_MODE (op0);
 889       if (GET_MODE_BITSIZE (mode) == 0
 890           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 891         mode = word_mode;
 892
 893       if (MEM_VOLATILE_P (op0)
 894           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 895           && flag_strict_volatile_bitfields > 0)
 896         mode = GET_MODE (op0);
 897       else
 898         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
 899                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 900
 901       if (mode == VOIDmode)
 902         {
 903           /* The only way this should occur is if the field spans word
 904              boundaries.  */
 905           store_split_bit_field (op0, bitsize, bitpos + offset * BITS_PER_UNIT,
 906                                  value);
 907           return;
 908         }
 909
 910       total_bits = GET_MODE_BITSIZE (mode);
 911
 912       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
 913          be in the range 0 to total_bits-1, and put any excess bytes in
 914          OFFSET.  */
 915       if (bitpos >= total_bits)
 916         {
 917           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
 918           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
 919                      * BITS_PER_UNIT);
 920         }
 921
 922       /* Get ref to an aligned byte, halfword, or word containing the field.
 923          Adjust BITPOS to be position within a word,
 924          and OFFSET to be the offset of that word.
 925          Then alter OP0 to refer to that word.  */
 926       bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
 927       offset -= (offset % (total_bits / BITS_PER_UNIT));
 928       op0 = adjust_address (op0, mode, offset);
 929     }
 930
 931   mode = GET_MODE (op0);
 932
 933   /* Now MODE is either some integral mode for a MEM as OP0,
 934      or is a full-word for a REG as OP0.  TOTAL_BITS corresponds.
 935      The bit field is contained entirely within OP0.
 936      BITPOS is the starting bit number within OP0.
 937      (OP0's mode may actually be narrower than MODE.)  */
 938
 939   if (BYTES_BIG_ENDIAN)
 940       /* BITPOS is the distance between our msb
 941          and that of the containing datum.
 942          Convert it to the distance from the lsb.  */
 943       bitpos = total_bits - bitsize - bitpos;
 944
 945   /* Now BITPOS is always the distance between our lsb
 946      and that of OP0.  */
 947
 948   /* Shift VALUE left by BITPOS bits.  If VALUE is not constant,
 949      we must first convert its mode to MODE.  */
 950
 951   if (CONST_INT_P (value))
 952     {
 953       HOST_WIDE_INT v = INTVAL (value);
 954
 955       if (bitsize < HOST_BITS_PER_WIDE_INT)
 956         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 957
 958       if (v == 0)
 959         all_zero = 1;
 960       else if ((bitsize < HOST_BITS_PER_WIDE_INT
 961                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
 962                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
 963         all_one = 1;
 964
 965       value = lshift_value (mode, value, bitpos, bitsize);
 966     }
 967   else
 968     {
 969       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
 970                       && bitpos + bitsize != GET_MODE_BITSIZE (mode));
 971
 972       if (GET_MODE (value) != mode)
 973         value = convert_to_mode (mode, value, 1);
 974
 975       if (must_and)
 976         value = expand_binop (mode, and_optab, value,
 977                               mask_rtx (mode, 0, bitsize, 0),
 978                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
 979       if (bitpos > 0)
 980         value = expand_shift (LSHIFT_EXPR, mode, value,
 981                               build_int_cst (NULL_TREE, bitpos), NULL_RTX, 1);
 982     }
 983
 984   /* Now clear the chosen bits in OP0,
 985      except that if VALUE is -1 we need not bother.  */
 986   /* We keep the intermediates in registers to allow CSE to combine
 987      consecutive bitfield assignments.  */
 988
 989   temp = force_reg (mode, op0);
 990
 991   if (! all_one)
 992     {
 993       temp = expand_binop (mode, and_optab, temp,
 994                            mask_rtx (mode, bitpos, bitsize, 1),
 995                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
 996       temp = force_reg (mode, temp);
 997     }
 998
 999   /* Now logical-or VALUE into OP0, unless it is zero.  */
1000
1001   if (! all_zero)
1002     {
1003       temp = expand_binop (mode, ior_optab, temp, value,
1004                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1005       temp = force_reg (mode, temp);
1006     }
1007
1008   if (op0 != temp)
1009     {
1010       op0 = copy_rtx (op0);
1011       emit_move_insn (op0, temp);
1012     }
1013 }
1014 \f
1015 /* Store a bit field that is split across multiple accessible memory objects.
1016
1017    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1018    BITSIZE is the field width; BITPOS the position of its first bit
1019    (within the word).
1020    VALUE is the value to store.
1021
1022    This does not yet handle fields wider than BITS_PER_WORD.  */
1023
1024 static void
1025 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1026                        unsigned HOST_WIDE_INT bitpos, rtx value)
1027 {
1028   unsigned int unit;
1029   unsigned int bitsdone = 0;
1030
1031   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1032      much at a time.  */
1033   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1034     unit = BITS_PER_WORD;
1035   else
1036     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1037
1038   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1039      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1040      that VALUE might be a floating-point constant.  */
1041   if (CONSTANT_P (value) && !CONST_INT_P (value))
1042     {
1043       rtx word = gen_lowpart_common (word_mode, value);
1044
1045       if (word && (value != word))
1046         value = word;
1047       else
1048         value = gen_lowpart_common (word_mode,
1049                                     force_reg (GET_MODE (value) != VOIDmode
1050                                                ? GET_MODE (value)
1051                                                : word_mode, value));
1052     }
1053
1054   while (bitsdone < bitsize)
1055     {
1056       unsigned HOST_WIDE_INT thissize;
1057       rtx part, word;
1058       unsigned HOST_WIDE_INT thispos;
1059       unsigned HOST_WIDE_INT offset;
1060
1061       offset = (bitpos + bitsdone) / unit;
1062       thispos = (bitpos + bitsdone) % unit;
1063
1064       /* THISSIZE must not overrun a word boundary.  Otherwise,
1065          store_fixed_bit_field will call us again, and we will mutually
1066          recurse forever.  */
1067       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1068       thissize = MIN (thissize, unit - thispos);
1069
1070       if (BYTES_BIG_ENDIAN)
1071         {
1072           int total_bits;
1073
1074           /* We must do an endian conversion exactly the same way as it is
1075              done in extract_bit_field, so that the two calls to
1076              extract_fixed_bit_field will have comparable arguments.  */
1077           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1078             total_bits = BITS_PER_WORD;
1079           else
1080             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1081
1082           /* Fetch successively less significant portions.  */
1083           if (CONST_INT_P (value))
1084             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1085                              >> (bitsize - bitsdone - thissize))
1086                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1087           else
1088             /* The args are chosen so that the last part includes the
1089                lsb.  Give extract_bit_field the value it needs (with
1090                endianness compensation) to fetch the piece we want.  */
1091             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1092                                             total_bits - bitsize + bitsdone,
1093                                             NULL_RTX, 1);
1094         }
1095       else
1096         {
1097           /* Fetch successively more significant portions.  */
1098           if (CONST_INT_P (value))
1099             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1100                              >> bitsdone)
1101                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1102           else
1103             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1104                                             bitsdone, NULL_RTX, 1);
1105         }
1106
1107       /* If OP0 is a register, then handle OFFSET here.
1108
1109          When handling multiword bitfields, extract_bit_field may pass
1110          down a word_mode SUBREG of a larger REG for a bitfield that actually
1111          crosses a word boundary.  Thus, for a SUBREG, we must find
1112          the current word starting from the base register.  */
1113       if (GET_CODE (op0) == SUBREG)
1114         {
1115           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1116           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1117                                         GET_MODE (SUBREG_REG (op0)));
1118           offset = 0;
1119         }
1120       else if (REG_P (op0))
1121         {
1122           word = operand_subword_force (op0, offset, GET_MODE (op0));
1123           offset = 0;
1124         }
1125       else
1126         word = op0;
1127
1128       /* OFFSET is in UNITs, and UNIT is in bits.
1129          store_fixed_bit_field wants offset in bytes.  */
1130       store_fixed_bit_field (word, offset * unit / BITS_PER_UNIT, thissize,
1131                              thispos, part);
1132       bitsdone += thissize;
1133     }
1134 }
1135 \f
1136 /* A subroutine of extract_bit_field_1 that converts return value X
1137    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1138    to extract_bit_field.  */
1139
1140 static rtx
1141 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1142                              enum machine_mode tmode, bool unsignedp)
1143 {
1144   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1145     return x;
1146
1147   /* If the x mode is not a scalar integral, first convert to the
1148      integer mode of that size and then access it as a floating-point
1149      value via a SUBREG.  */
1150   if (!SCALAR_INT_MODE_P (tmode))
1151     {
1152       enum machine_mode smode;
1153
1154       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1155       x = convert_to_mode (smode, x, unsignedp);
1156       x = force_reg (smode, x);
1157       return gen_lowpart (tmode, x);
1158     }
1159
1160   return convert_to_mode (tmode, x, unsignedp);
1161 }
1162
1163 /* A subroutine of extract_bit_field, with the same arguments.
1164    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1165    if we can find no other means of implementing the operation.
1166    if FALLBACK_P is false, return NULL instead.  */
1167
1168 static rtx
1169 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1170                      unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1171                      enum machine_mode mode, enum machine_mode tmode,
1172                      bool fallback_p)
1173 {
1174   unsigned int unit
1175     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1176   unsigned HOST_WIDE_INT offset, bitpos;
1177   rtx op0 = str_rtx;
1178   enum machine_mode int_mode;
1179   enum machine_mode ext_mode;
1180   enum machine_mode mode1;
1181   enum insn_code icode;
1182   int byte_offset;
1183
1184   if (tmode == VOIDmode)
1185     tmode = mode;
1186
1187   while (GET_CODE (op0) == SUBREG)
1188     {
1189       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1190       op0 = SUBREG_REG (op0);
1191     }
1192
1193   /* If we have an out-of-bounds access to a register, just return an
1194      uninitialized register of the required mode.  This can occur if the
1195      source code contains an out-of-bounds access to a small array.  */
1196   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1197     return gen_reg_rtx (tmode);
1198
1199   if (REG_P (op0)
1200       && mode == GET_MODE (op0)
1201       && bitnum == 0
1202       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1203     {
1204       /* We're trying to extract a full register from itself.  */
1205       return op0;
1206     }
1207
1208   /* See if we can get a better vector mode before extracting.  */
1209   if (VECTOR_MODE_P (GET_MODE (op0))
1210       && !MEM_P (op0)
1211       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1212     {
1213       enum machine_mode new_mode;
1214       int nunits = GET_MODE_NUNITS (GET_MODE (op0));
1215
1216       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1217         new_mode = MIN_MODE_VECTOR_FLOAT;
1218       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1219         new_mode = MIN_MODE_VECTOR_FRACT;
1220       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1221         new_mode = MIN_MODE_VECTOR_UFRACT;
1222       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1223         new_mode = MIN_MODE_VECTOR_ACCUM;
1224       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1225         new_mode = MIN_MODE_VECTOR_UACCUM;
1226       else
1227         new_mode = MIN_MODE_VECTOR_INT;
1228
1229       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1230         if (GET_MODE_NUNITS (new_mode) == nunits
1231             && GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1232             && targetm.vector_mode_supported_p (new_mode))
1233           break;
1234       if (new_mode != VOIDmode)
1235         op0 = gen_lowpart (new_mode, op0);
1236     }
1237
1238   /* Use vec_extract patterns for extracting parts of vectors whenever
1239      available.  */
1240   if (VECTOR_MODE_P (GET_MODE (op0))
1241       && !MEM_P (op0)
1242       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1243       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1244           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1245     {
1246       enum machine_mode outermode = GET_MODE (op0);
1247       enum machine_mode innermode = GET_MODE_INNER (outermode);
1248       int icode = (int) optab_handler (vec_extract_optab, outermode);
1249       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1250       rtx rtxpos = GEN_INT (pos);
1251       rtx src = op0;
1252       rtx dest = NULL, pat, seq;
1253       enum machine_mode mode0 = insn_data[icode].operand[0].mode;
1254       enum machine_mode mode1 = insn_data[icode].operand[1].mode;
1255       enum machine_mode mode2 = insn_data[icode].operand[2].mode;
1256
1257       if (innermode == tmode || innermode == mode)
1258         dest = target;
1259
1260       if (!dest)
1261         dest = gen_reg_rtx (innermode);
1262
1263       start_sequence ();
1264
1265       if (! (*insn_data[icode].operand[0].predicate) (dest, mode0))
1266         dest = copy_to_mode_reg (mode0, dest);
1267
1268       if (! (*insn_data[icode].operand[1].predicate) (src, mode1))
1269         src = copy_to_mode_reg (mode1, src);
1270
1271       if (! (*insn_data[icode].operand[2].predicate) (rtxpos, mode2))
1272         rtxpos = copy_to_mode_reg (mode1, rtxpos);
1273
1274       /* We could handle this, but we should always be called with a pseudo
1275          for our targets and all insns should take them as outputs.  */
1276       gcc_assert ((*insn_data[icode].operand[0].predicate) (dest, mode0)
1277                   && (*insn_data[icode].operand[1].predicate) (src, mode1)
1278                   && (*insn_data[icode].operand[2].predicate) (rtxpos, mode2));
1279
1280       pat = GEN_FCN (icode) (dest, src, rtxpos);
1281       seq = get_insns ();
1282       end_sequence ();
1283       if (pat)
1284         {
1285           emit_insn (seq);
1286           emit_insn (pat);
1287           if (mode0 != mode)
1288             return gen_lowpart (tmode, dest);
1289           return dest;
1290         }
1291     }
1292
1293   /* Make sure we are playing with integral modes.  Pun with subregs
1294      if we aren't.  */
1295   {
1296     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1297     if (imode != GET_MODE (op0))
1298       {
1299         if (MEM_P (op0))
1300           op0 = adjust_address (op0, imode, 0);
1301         else if (imode != BLKmode)
1302           {
1303             op0 = gen_lowpart (imode, op0);
1304
1305             /* If we got a SUBREG, force it into a register since we
1306                aren't going to be able to do another SUBREG on it.  */
1307             if (GET_CODE (op0) == SUBREG)
1308               op0 = force_reg (imode, op0);
1309           }
1310         else if (REG_P (op0))
1311           {
1312             rtx reg, subreg;
1313             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1314                                             MODE_INT);
1315             reg = gen_reg_rtx (imode);
1316             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1317             emit_move_insn (subreg, op0);
1318             op0 = reg;
1319             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1320           }
1321         else
1322           {
1323             rtx mem = assign_stack_temp (GET_MODE (op0),
1324                                          GET_MODE_SIZE (GET_MODE (op0)), 0);
1325             emit_move_insn (mem, op0);
1326             op0 = adjust_address (mem, BLKmode, 0);
1327           }
1328       }
1329   }
1330
1331   /* We may be accessing data outside the field, which means
1332      we can alias adjacent data.  */
1333   if (MEM_P (op0))
1334     {
1335       op0 = shallow_copy_rtx (op0);
1336       set_mem_alias_set (op0, 0);
1337       set_mem_expr (op0, 0);
1338     }
1339
1340   /* Extraction of a full-word or multi-word value from a structure
1341      in a register or aligned memory can be done with just a SUBREG.
1342      A subword value in the least significant part of a register
1343      can also be extracted with a SUBREG.  For this, we need the
1344      byte offset of the value in op0.  */
1345
1346   bitpos = bitnum % unit;
1347   offset = bitnum / unit;
1348   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1349
1350   /* If OP0 is a register, BITPOS must count within a word.
1351      But as we have it, it counts within whatever size OP0 now has.
1352      On a bigendian machine, these are not the same, so convert.  */
1353   if (BYTES_BIG_ENDIAN
1354       && !MEM_P (op0)
1355       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1356     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1357
1358   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1359      If that's wrong, the solution is to test for it and set TARGET to 0
1360      if needed.  */
1361
1362   /* Only scalar integer modes can be converted via subregs.  There is an
1363      additional problem for FP modes here in that they can have a precision
1364      which is different from the size.  mode_for_size uses precision, but
1365      we want a mode based on the size, so we must avoid calling it for FP
1366      modes.  */
1367   mode1  = (SCALAR_INT_MODE_P (tmode)
1368             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1369             : mode);
1370
1371   /* If the bitfield is volatile, we need to make sure the access
1372      remains on a type-aligned boundary.  */
1373   if (GET_CODE (op0) == MEM
1374       && MEM_VOLATILE_P (op0)
1375       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1376       && flag_strict_volatile_bitfields > 0)
1377     goto no_subreg_mode_swap;
1378
1379   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1380         && bitpos % BITS_PER_WORD == 0)
1381        || (mode1 != BLKmode
1382            /* ??? The big endian test here is wrong.  This is correct
1383               if the value is in a register, and if mode_for_size is not
1384               the same mode as op0.  This causes us to get unnecessarily
1385               inefficient code from the Thumb port when -mbig-endian.  */
1386            && (BYTES_BIG_ENDIAN
1387                ? bitpos + bitsize == BITS_PER_WORD
1388                : bitpos == 0)))
1389       && ((!MEM_P (op0)
1390            && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (mode1),
1391                                      GET_MODE_BITSIZE (GET_MODE (op0)))
1392            && GET_MODE_SIZE (mode1) != 0
1393            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1394           || (MEM_P (op0)
1395               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1396                   || (offset * BITS_PER_UNIT % bitsize == 0
1397                       && MEM_ALIGN (op0) % bitsize == 0)))))
1398     {
1399       if (MEM_P (op0))
1400         op0 = adjust_address (op0, mode1, offset);
1401       else if (mode1 != GET_MODE (op0))
1402         {
1403           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1404                                          byte_offset);
1405           if (sub == NULL)
1406             goto no_subreg_mode_swap;
1407           op0 = sub;
1408         }
1409       if (mode1 != mode)
1410         return convert_to_mode (tmode, op0, unsignedp);
1411       return op0;
1412     }
1413  no_subreg_mode_swap:
1414
1415   /* Handle fields bigger than a word.  */
1416
1417   if (bitsize > BITS_PER_WORD)
1418     {
1419       /* Here we transfer the words of the field
1420          in the order least significant first.
1421          This is because the most significant word is the one which may
1422          be less than full.  */
1423
1424       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1425       unsigned int i;
1426
1427       if (target == 0 || !REG_P (target))
1428         target = gen_reg_rtx (mode);
1429
1430       /* Indicate for flow that the entire target reg is being set.  */
1431       emit_clobber (target);
1432
1433       for (i = 0; i < nwords; i++)
1434         {
1435           /* If I is 0, use the low-order word in both field and target;
1436              if I is 1, use the next to lowest word; and so on.  */
1437           /* Word number in TARGET to use.  */
1438           unsigned int wordnum
1439             = (WORDS_BIG_ENDIAN
1440                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1441                : i);
1442           /* Offset from start of field in OP0.  */
1443           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1444                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1445                                                 * (int) BITS_PER_WORD))
1446                                      : (int) i * BITS_PER_WORD);
1447           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1448           rtx result_part
1449             = extract_bit_field (op0, MIN (BITS_PER_WORD,
1450                                            bitsize - i * BITS_PER_WORD),
1451                                  bitnum + bit_offset, 1, target_part, mode,
1452                                  word_mode);
1453
1454           gcc_assert (target_part);
1455
1456           if (result_part != target_part)
1457             emit_move_insn (target_part, result_part);
1458         }
1459
1460       if (unsignedp)
1461         {
1462           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1463              need to be zero'd out.  */
1464           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1465             {
1466               unsigned int i, total_words;
1467
1468               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1469               for (i = nwords; i < total_words; i++)
1470                 emit_move_insn
1471                   (operand_subword (target,
1472                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1473                                     1, VOIDmode),
1474                    const0_rtx);
1475             }
1476           return target;
1477         }
1478
1479       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1480       target = expand_shift (LSHIFT_EXPR, mode, target,
1481                              build_int_cst (NULL_TREE,
1482                                             GET_MODE_BITSIZE (mode) - bitsize),
1483                              NULL_RTX, 0);
1484       return expand_shift (RSHIFT_EXPR, mode, target,
1485                            build_int_cst (NULL_TREE,
1486                                           GET_MODE_BITSIZE (mode) - bitsize),
1487                            NULL_RTX, 0);
1488     }
1489
1490   /* From here on we know the desired field is smaller than a word.  */
1491
1492   /* Check if there is a correspondingly-sized integer field, so we can
1493      safely extract it as one size of integer, if necessary; then
1494      truncate or extend to the size that is wanted; then use SUBREGs or
1495      convert_to_mode to get one of the modes we really wanted.  */
1496
1497   int_mode = int_mode_for_mode (tmode);
1498   if (int_mode == BLKmode)
1499     int_mode = int_mode_for_mode (mode);
1500   /* Should probably push op0 out to memory and then do a load.  */
1501   gcc_assert (int_mode != BLKmode);
1502
1503   /* OFFSET is the number of words or bytes (UNIT says which)
1504      from STR_RTX to the first word or byte containing part of the field.  */
1505   if (!MEM_P (op0))
1506     {
1507       if (offset != 0
1508           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1509         {
1510           if (!REG_P (op0))
1511             op0 = copy_to_reg (op0);
1512           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1513                                 op0, (offset * UNITS_PER_WORD));
1514         }
1515       offset = 0;
1516     }
1517
1518   /* Now OFFSET is nonzero only for memory operands.  */
1519   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1520   icode = unsignedp ? CODE_FOR_extzv : CODE_FOR_extv;
1521   if (ext_mode != MAX_MACHINE_MODE
1522       && bitsize > 0
1523       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1524       /* If op0 is a register, we need it in EXT_MODE to make it
1525          acceptable to the format of ext(z)v.  */
1526       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1527       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1528            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode)))
1529       && check_predicate_volatile_ok (icode, 1, op0, GET_MODE (op0)))
1530     {
1531       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1532       rtx bitsize_rtx, bitpos_rtx;
1533       rtx last = get_last_insn ();
1534       rtx xop0 = op0;
1535       rtx xtarget = target;
1536       rtx xspec_target = target;
1537       rtx xspec_target_subreg = 0;
1538       rtx pat;
1539
1540       /* If op0 is a register, we need it in EXT_MODE to make it
1541          acceptable to the format of ext(z)v.  */
1542       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1543         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1544       if (MEM_P (xop0))
1545         /* Get ref to first byte containing part of the field.  */
1546         xop0 = adjust_address (xop0, byte_mode, xoffset);
1547
1548       /* On big-endian machines, we count bits from the most significant.
1549          If the bit field insn does not, we must invert.  */
1550       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1551         xbitpos = unit - bitsize - xbitpos;
1552
1553       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1554       if (BITS_BIG_ENDIAN && !MEM_P (xop0))
1555         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1556
1557       unit = GET_MODE_BITSIZE (ext_mode);
1558
1559       if (xtarget == 0)
1560         xtarget = xspec_target = gen_reg_rtx (tmode);
1561
1562       if (GET_MODE (xtarget) != ext_mode)
1563         {
1564           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1565              between the mode of the extraction (word_mode) and the target
1566              mode.  Instead, create a temporary and use convert_move to set
1567              the target.  */
1568           if (REG_P (xtarget)
1569               && TRULY_NOOP_TRUNCATION (GET_MODE_BITSIZE (GET_MODE (xtarget)),
1570                                         GET_MODE_BITSIZE (ext_mode)))
1571             {
1572               xtarget = gen_lowpart (ext_mode, xtarget);
1573               if (GET_MODE_SIZE (ext_mode)
1574                   > GET_MODE_SIZE (GET_MODE (xspec_target)))
1575                 xspec_target_subreg = xtarget;
1576             }
1577           else
1578             xtarget = gen_reg_rtx (ext_mode);
1579         }
1580
1581       /* If this machine's ext(z)v insists on a register target,
1582          make sure we have one.  */
1583       if (!insn_data[(int) icode].operand[0].predicate (xtarget, ext_mode))
1584         xtarget = gen_reg_rtx (ext_mode);
1585
1586       bitsize_rtx = GEN_INT (bitsize);
1587       bitpos_rtx = GEN_INT (xbitpos);
1588
1589       pat = (unsignedp
1590              ? gen_extzv (xtarget, xop0, bitsize_rtx, bitpos_rtx)
1591              : gen_extv (xtarget, xop0, bitsize_rtx, bitpos_rtx));
1592       if (pat)
1593         {
1594           emit_insn (pat);
1595           if (xtarget == xspec_target)
1596             return xtarget;
1597           if (xtarget == xspec_target_subreg)
1598             return xspec_target;
1599           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1600         }
1601       delete_insns_since (last);
1602     }
1603
1604   /* If OP0 is a memory, try copying it to a register and seeing if a
1605      cheap register alternative is available.  */
1606   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1607     {
1608       enum machine_mode bestmode;
1609
1610       /* Get the mode to use for inserting into this field.  If
1611          OP0 is BLKmode, get the smallest mode consistent with the
1612          alignment. If OP0 is a non-BLKmode object that is no
1613          wider than EXT_MODE, use its mode. Otherwise, use the
1614          smallest mode containing the field.  */
1615
1616       if (GET_MODE (op0) == BLKmode
1617           || (ext_mode != MAX_MACHINE_MODE
1618               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1619         bestmode = get_best_mode (bitsize, bitnum, MEM_ALIGN (op0),
1620                                   (ext_mode == MAX_MACHINE_MODE
1621                                    ? VOIDmode : ext_mode),
1622                                   MEM_VOLATILE_P (op0));
1623       else
1624         bestmode = GET_MODE (op0);
1625
1626       if (bestmode != VOIDmode
1627           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1628                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1629         {
1630           unsigned HOST_WIDE_INT xoffset, xbitpos;
1631
1632           /* Compute the offset as a multiple of this unit,
1633              counting in bytes.  */
1634           unit = GET_MODE_BITSIZE (bestmode);
1635           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1636           xbitpos = bitnum % unit;
1637
1638           /* Make sure the register is big enough for the whole field.  */
1639           if (xoffset * BITS_PER_UNIT + unit
1640               >= offset * BITS_PER_UNIT + bitsize)
1641             {
1642               rtx last, result, xop0;
1643
1644               last = get_last_insn ();
1645
1646               /* Fetch it to a register in that size.  */
1647               xop0 = adjust_address (op0, bestmode, xoffset);
1648               xop0 = force_reg (bestmode, xop0);
1649               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1650                                             unsignedp, target,
1651                                             mode, tmode, false);
1652               if (result)
1653                 return result;
1654
1655               delete_insns_since (last);
1656             }
1657         }
1658     }
1659
1660   if (!fallback_p)
1661     return NULL;
1662
1663   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1664                                     bitpos, target, unsignedp);
1665   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1666 }
1667
1668 /* Generate code to extract a byte-field from STR_RTX
1669    containing BITSIZE bits, starting at BITNUM,
1670    and put it in TARGET if possible (if TARGET is nonzero).
1671    Regardless of TARGET, we return the rtx for where the value is placed.
1672
1673    STR_RTX is the structure containing the byte (a REG or MEM).
1674    UNSIGNEDP is nonzero if this is an unsigned bit field.
1675    MODE is the natural mode of the field value once extracted.
1676    TMODE is the mode the caller would like the value to have;
1677    but the value may be returned with type MODE instead.
1678
1679    If a TARGET is specified and we can store in it at no extra cost,
1680    we do so, and return TARGET.
1681    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1682    if they are equally easy.  */
1683
1684 rtx
1685 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1686                    unsigned HOST_WIDE_INT bitnum, int unsignedp, rtx target,
1687                    enum machine_mode mode, enum machine_mode tmode)
1688 {
1689   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp,
1690                               target, mode, tmode, true);
1691 }
1692 \f
1693 /* Extract a bit field using shifts and boolean operations
1694    Returns an rtx to represent the value.
1695    OP0 addresses a register (word) or memory (byte).
1696    BITPOS says which bit within the word or byte the bit field starts in.
1697    OFFSET says how many bytes farther the bit field starts;
1698     it is 0 if OP0 is a register.
1699    BITSIZE says how many bits long the bit field is.
1700     (If OP0 is a register, it may be narrower than a full word,
1701      but BITPOS still counts within a full word,
1702      which is significant on bigendian machines.)
1703
1704    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1705    If TARGET is nonzero, attempts to store the value there
1706    and return TARGET, but this is not guaranteed.
1707    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1708
1709 static rtx
1710 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1711                          unsigned HOST_WIDE_INT offset,
1712                          unsigned HOST_WIDE_INT bitsize,
1713                          unsigned HOST_WIDE_INT bitpos, rtx target,
1714                          int unsignedp)
1715 {
1716   unsigned int total_bits = BITS_PER_WORD;
1717   enum machine_mode mode;
1718
1719   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1720     {
1721       /* Special treatment for a bit field split across two registers.  */
1722       if (bitsize + bitpos > BITS_PER_WORD)
1723         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1724     }
1725   else
1726     {
1727       /* Get the proper mode to use for this field.  We want a mode that
1728          includes the entire field.  If such a mode would be larger than
1729          a word, we won't be doing the extraction the normal way.  */
1730
1731       if (MEM_VOLATILE_P (op0)
1732           && flag_strict_volatile_bitfields > 0)
1733         {
1734           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1735             mode = GET_MODE (op0);
1736           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1737             mode = GET_MODE (target);
1738           else
1739             mode = tmode;
1740         }
1741       else
1742         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT,
1743                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1744
1745       if (mode == VOIDmode)
1746         /* The only way this should occur is if the field spans word
1747            boundaries.  */
1748         return extract_split_bit_field (op0, bitsize,
1749                                         bitpos + offset * BITS_PER_UNIT,
1750                                         unsignedp);
1751
1752       total_bits = GET_MODE_BITSIZE (mode);
1753
1754       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1755          be in the range 0 to total_bits-1, and put any excess bytes in
1756          OFFSET.  */
1757       if (bitpos >= total_bits)
1758         {
1759           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1760           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1761                      * BITS_PER_UNIT);
1762         }
1763
1764       /* If we're accessing a volatile MEM, we can't do the next
1765          alignment step if it results in a multi-word access where we
1766          otherwise wouldn't have one.  So, check for that case
1767          here.  */
1768       if (MEM_P (op0)
1769           && MEM_VOLATILE_P (op0)
1770           && flag_strict_volatile_bitfields > 0
1771           && bitpos + bitsize <= total_bits
1772           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1773         {
1774           if (STRICT_ALIGNMENT)
1775             {
1776               static bool informed_about_misalignment = false;
1777               bool warned;
1778
1779               if (bitsize == total_bits)
1780                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1781                                      "mis-aligned access used for structure member");
1782               else
1783                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1784                                      "mis-aligned access used for structure bitfield");
1785
1786               if (! informed_about_misalignment && warned)
1787                 {
1788                   informed_about_misalignment = true;
1789                   inform (input_location,
1790                           "When a volatile object spans multiple type-sized locations,"
1791                           " the compiler must choose between using a single mis-aligned access to"
1792                           " preserve the volatility, or using multiple aligned accesses to avoid"
1793                           " runtime faults.  This code may fail at runtime if the hardware does"
1794                           " not allow this access.");
1795                 }
1796             }
1797         }
1798       else
1799         {
1800
1801           /* Get ref to an aligned byte, halfword, or word containing the field.
1802              Adjust BITPOS to be position within a word,
1803              and OFFSET to be the offset of that word.
1804              Then alter OP0 to refer to that word.  */
1805           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1806           offset -= (offset % (total_bits / BITS_PER_UNIT));
1807         }
1808
1809       op0 = adjust_address (op0, mode, offset);
1810     }
1811
1812   mode = GET_MODE (op0);
1813
1814   if (BYTES_BIG_ENDIAN)
1815     /* BITPOS is the distance between our msb and that of OP0.
1816        Convert it to the distance from the lsb.  */
1817     bitpos = total_bits - bitsize - bitpos;
1818
1819   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1820      We have reduced the big-endian case to the little-endian case.  */
1821
1822   if (unsignedp)
1823     {
1824       if (bitpos)
1825         {
1826           /* If the field does not already start at the lsb,
1827              shift it so it does.  */
1828           tree amount = build_int_cst (NULL_TREE, bitpos);
1829           /* Maybe propagate the target for the shift.  */
1830           /* But not if we will return it--could confuse integrate.c.  */
1831           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1832           if (tmode != mode) subtarget = 0;
1833           op0 = expand_shift (RSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1834         }
1835       /* Convert the value to the desired mode.  */
1836       if (mode != tmode)
1837         op0 = convert_to_mode (tmode, op0, 1);
1838
1839       /* Unless the msb of the field used to be the msb when we shifted,
1840          mask out the upper bits.  */
1841
1842       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1843         return expand_binop (GET_MODE (op0), and_optab, op0,
1844                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1845                              target, 1, OPTAB_LIB_WIDEN);
1846       return op0;
1847     }
1848
1849   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1850      then arithmetic-shift its lsb to the lsb of the word.  */
1851   op0 = force_reg (mode, op0);
1852   if (mode != tmode)
1853     target = 0;
1854
1855   /* Find the narrowest integer mode that contains the field.  */
1856
1857   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1858        mode = GET_MODE_WIDER_MODE (mode))
1859     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1860       {
1861         op0 = convert_to_mode (mode, op0, 0);
1862         break;
1863       }
1864
1865   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1866     {
1867       tree amount
1868         = build_int_cst (NULL_TREE,
1869                          GET_MODE_BITSIZE (mode) - (bitsize + bitpos));
1870       /* Maybe propagate the target for the shift.  */
1871       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1872       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1873     }
1874
1875   return expand_shift (RSHIFT_EXPR, mode, op0,
1876                        build_int_cst (NULL_TREE,
1877                                       GET_MODE_BITSIZE (mode) - bitsize),
1878                        target, 0);
1879 }
1880 \f
1881 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1882    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1883    complement of that if COMPLEMENT.  The mask is truncated if
1884    necessary to the width of mode MODE.  The mask is zero-extended if
1885    BITSIZE+BITPOS is too small for MODE.  */
1886
1887 static rtx
1888 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1889 {
1890   double_int mask;
1891
1892   mask = double_int_mask (bitsize);
1893   mask = double_int_lshift (mask, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1894
1895   if (complement)
1896     mask = double_int_not (mask);
1897
1898   return immed_double_int_const (mask, mode);
1899 }
1900
1901 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1902    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1903
1904 static rtx
1905 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1906 {
1907   double_int val;
1908
1909   val = double_int_zext (uhwi_to_double_int (INTVAL (value)), bitsize);
1910   val = double_int_lshift (val, bitpos, HOST_BITS_PER_DOUBLE_INT, false);
1911
1912   return immed_double_int_const (val, mode);
1913 }
1914 \f
1915 /* Extract a bit field that is split across two words
1916    and return an RTX for the result.
1917
1918    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1919    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1920    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1921
1922 static rtx
1923 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1924                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1925 {
1926   unsigned int unit;
1927   unsigned int bitsdone = 0;
1928   rtx result = NULL_RTX;
1929   int first = 1;
1930
1931   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1932      much at a time.  */
1933   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1934     unit = BITS_PER_WORD;
1935   else
1936     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1937
1938   while (bitsdone < bitsize)
1939     {
1940       unsigned HOST_WIDE_INT thissize;
1941       rtx part, word;
1942       unsigned HOST_WIDE_INT thispos;
1943       unsigned HOST_WIDE_INT offset;
1944
1945       offset = (bitpos + bitsdone) / unit;
1946       thispos = (bitpos + bitsdone) % unit;
1947
1948       /* THISSIZE must not overrun a word boundary.  Otherwise,
1949          extract_fixed_bit_field will call us again, and we will mutually
1950          recurse forever.  */
1951       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1952       thissize = MIN (thissize, unit - thispos);
1953
1954       /* If OP0 is a register, then handle OFFSET here.
1955
1956          When handling multiword bitfields, extract_bit_field may pass
1957          down a word_mode SUBREG of a larger REG for a bitfield that actually
1958          crosses a word boundary.  Thus, for a SUBREG, we must find
1959          the current word starting from the base register.  */
1960       if (GET_CODE (op0) == SUBREG)
1961         {
1962           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1963           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1964                                         GET_MODE (SUBREG_REG (op0)));
1965           offset = 0;
1966         }
1967       else if (REG_P (op0))
1968         {
1969           word = operand_subword_force (op0, offset, GET_MODE (op0));
1970           offset = 0;
1971         }
1972       else
1973         word = op0;
1974
1975       /* Extract the parts in bit-counting order,
1976          whose meaning is determined by BYTES_PER_UNIT.
1977          OFFSET is in UNITs, and UNIT is in bits.
1978          extract_fixed_bit_field wants offset in bytes.  */
1979       part = extract_fixed_bit_field (word_mode, word,
1980                                       offset * unit / BITS_PER_UNIT,
1981                                       thissize, thispos, 0, 1);
1982       bitsdone += thissize;
1983
1984       /* Shift this part into place for the result.  */
1985       if (BYTES_BIG_ENDIAN)
1986         {
1987           if (bitsize != bitsdone)
1988             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1989                                  build_int_cst (NULL_TREE, bitsize - bitsdone),
1990                                  0, 1);
1991         }
1992       else
1993         {
1994           if (bitsdone != thissize)
1995             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1996                                  build_int_cst (NULL_TREE,
1997                                                 bitsdone - thissize), 0, 1);
1998         }
1999
2000       if (first)
2001         result = part;
2002       else
2003         /* Combine the parts with bitwise or.  This works
2004            because we extracted each part as an unsigned bit field.  */
2005         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2006                                OPTAB_LIB_WIDEN);
2007
2008       first = 0;
2009     }
2010
2011   /* Unsigned bit field: we are done.  */
2012   if (unsignedp)
2013     return result;
2014   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2015   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2016                          build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2017                          NULL_RTX, 0);
2018   return expand_shift (RSHIFT_EXPR, word_mode, result,
2019                        build_int_cst (NULL_TREE, BITS_PER_WORD - bitsize),
2020                        NULL_RTX, 0);
2021 }
2022 \f
2023 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2024    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2025    MODE, fill the upper bits with zeros.  Fail if the layout of either
2026    mode is unknown (as for CC modes) or if the extraction would involve
2027    unprofitable mode punning.  Return the value on success, otherwise
2028    return null.
2029
2030    This is different from gen_lowpart* in these respects:
2031
2032      - the returned value must always be considered an rvalue
2033
2034      - when MODE is wider than SRC_MODE, the extraction involves
2035        a zero extension
2036
2037      - when MODE is smaller than SRC_MODE, the extraction involves
2038        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2039
2040    In other words, this routine performs a computation, whereas the
2041    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2042    operations.  */
2043
2044 rtx
2045 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2046 {
2047   enum machine_mode int_mode, src_int_mode;
2048
2049   if (mode == src_mode)
2050     return src;
2051
2052   if (CONSTANT_P (src))
2053     {
2054       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2055          fails, it will happily create (subreg (symbol_ref)) or similar
2056          invalid SUBREGs.  */
2057       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2058       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2059       if (ret)
2060         return ret;
2061
2062       if (GET_MODE (src) == VOIDmode
2063           || !validate_subreg (mode, src_mode, src, byte))
2064         return NULL_RTX;
2065
2066       src = force_reg (GET_MODE (src), src);
2067       return gen_rtx_SUBREG (mode, src, byte);
2068     }
2069
2070   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2071     return NULL_RTX;
2072
2073   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2074       && MODES_TIEABLE_P (mode, src_mode))
2075     {
2076       rtx x = gen_lowpart_common (mode, src);
2077       if (x)
2078         return x;
2079     }
2080
2081   src_int_mode = int_mode_for_mode (src_mode);
2082   int_mode = int_mode_for_mode (mode);
2083   if (src_int_mode == BLKmode || int_mode == BLKmode)
2084     return NULL_RTX;
2085
2086   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2087     return NULL_RTX;
2088   if (!MODES_TIEABLE_P (int_mode, mode))
2089     return NULL_RTX;
2090
2091   src = gen_lowpart (src_int_mode, src);
2092   src = convert_modes (int_mode, src_int_mode, src, true);
2093   src = gen_lowpart (mode, src);
2094   return src;
2095 }
2096 \f
2097 /* Add INC into TARGET.  */
2098
2099 void
2100 expand_inc (rtx target, rtx inc)
2101 {
2102   rtx value = expand_binop (GET_MODE (target), add_optab,
2103                             target, inc,
2104                             target, 0, OPTAB_LIB_WIDEN);
2105   if (value != target)
2106     emit_move_insn (target, value);
2107 }
2108
2109 /* Subtract DEC from TARGET.  */
2110
2111 void
2112 expand_dec (rtx target, rtx dec)
2113 {
2114   rtx value = expand_binop (GET_MODE (target), sub_optab,
2115                             target, dec,
2116                             target, 0, OPTAB_LIB_WIDEN);
2117   if (value != target)
2118     emit_move_insn (target, value);
2119 }
2120 \f
2121 /* Output a shift instruction for expression code CODE,
2122    with SHIFTED being the rtx for the value to shift,
2123    and AMOUNT the tree for the amount to shift by.
2124    Store the result in the rtx TARGET, if that is convenient.
2125    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2126    Return the rtx for where the value is.  */
2127
2128 rtx
2129 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2130               tree amount, rtx target, int unsignedp)
2131 {
2132   rtx op1, temp = 0;
2133   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2134   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2135   optab lshift_optab = ashl_optab;
2136   optab rshift_arith_optab = ashr_optab;
2137   optab rshift_uns_optab = lshr_optab;
2138   optab lrotate_optab = rotl_optab;
2139   optab rrotate_optab = rotr_optab;
2140   enum machine_mode op1_mode;
2141   int attempt;
2142   bool speed = optimize_insn_for_speed_p ();
2143
2144   op1 = expand_normal (amount);
2145   op1_mode = GET_MODE (op1);
2146
2147   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2148      shift amount is a vector, use the vector/vector shift patterns.  */
2149   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2150     {
2151       lshift_optab = vashl_optab;
2152       rshift_arith_optab = vashr_optab;
2153       rshift_uns_optab = vlshr_optab;
2154       lrotate_optab = vrotl_optab;
2155       rrotate_optab = vrotr_optab;
2156     }
2157
2158   /* Previously detected shift-counts computed by NEGATE_EXPR
2159      and shifted in the other direction; but that does not work
2160      on all machines.  */
2161
2162   if (SHIFT_COUNT_TRUNCATED)
2163     {
2164       if (CONST_INT_P (op1)
2165           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2166               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2167         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2168                        % GET_MODE_BITSIZE (mode));
2169       else if (GET_CODE (op1) == SUBREG
2170                && subreg_lowpart_p (op1)
2171                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2172         op1 = SUBREG_REG (op1);
2173     }
2174
2175   if (op1 == const0_rtx)
2176     return shifted;
2177
2178   /* Check whether its cheaper to implement a left shift by a constant
2179      bit count by a sequence of additions.  */
2180   if (code == LSHIFT_EXPR
2181       && CONST_INT_P (op1)
2182       && INTVAL (op1) > 0
2183       && INTVAL (op1) < GET_MODE_BITSIZE (mode)
2184       && INTVAL (op1) < MAX_BITS_PER_WORD
2185       && shift_cost[speed][mode][INTVAL (op1)] > INTVAL (op1) * add_cost[speed][mode]
2186       && shift_cost[speed][mode][INTVAL (op1)] != MAX_COST)
2187     {
2188       int i;
2189       for (i = 0; i < INTVAL (op1); i++)
2190         {
2191           temp = force_reg (mode, shifted);
2192           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2193                                   unsignedp, OPTAB_LIB_WIDEN);
2194         }
2195       return shifted;
2196     }
2197
2198   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2199     {
2200       enum optab_methods methods;
2201
2202       if (attempt == 0)
2203         methods = OPTAB_DIRECT;
2204       else if (attempt == 1)
2205         methods = OPTAB_WIDEN;
2206       else
2207         methods = OPTAB_LIB_WIDEN;
2208
2209       if (rotate)
2210         {
2211           /* Widening does not work for rotation.  */
2212           if (methods == OPTAB_WIDEN)
2213             continue;
2214           else if (methods == OPTAB_LIB_WIDEN)
2215             {
2216               /* If we have been unable to open-code this by a rotation,
2217                  do it as the IOR of two shifts.  I.e., to rotate A
2218                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2219                  where C is the bitsize of A.
2220
2221                  It is theoretically possible that the target machine might
2222                  not be able to perform either shift and hence we would
2223                  be making two libcalls rather than just the one for the
2224                  shift (similarly if IOR could not be done).  We will allow
2225                  this extremely unlikely lossage to avoid complicating the
2226                  code below.  */
2227
2228               rtx subtarget = target == shifted ? 0 : target;
2229               tree new_amount, other_amount;
2230               rtx temp1;
2231               tree type = TREE_TYPE (amount);
2232               if (GET_MODE (op1) != TYPE_MODE (type)
2233                   && GET_MODE (op1) != VOIDmode)
2234                 op1 = convert_to_mode (TYPE_MODE (type), op1, 1);
2235               new_amount = make_tree (type, op1);
2236               other_amount
2237                 = fold_build2 (MINUS_EXPR, type,
2238                                build_int_cst (type, GET_MODE_BITSIZE (mode)),
2239                                new_amount);
2240
2241               shifted = force_reg (mode, shifted);
2242
2243               temp = expand_shift (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2244                                    mode, shifted, new_amount, 0, 1);
2245               temp1 = expand_shift (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2246                                     mode, shifted, other_amount, subtarget, 1);
2247               return expand_binop (mode, ior_optab, temp, temp1, target,
2248                                    unsignedp, methods);
2249             }
2250
2251           temp = expand_binop (mode,
2252                                left ? lrotate_optab : rrotate_optab,
2253                                shifted, op1, target, unsignedp, methods);
2254         }
2255       else if (unsignedp)
2256         temp = expand_binop (mode,
2257                              left ? lshift_optab : rshift_uns_optab,
2258                              shifted, op1, target, unsignedp, methods);
2259
2260       /* Do arithmetic shifts.
2261          Also, if we are going to widen the operand, we can just as well
2262          use an arithmetic right-shift instead of a logical one.  */
2263       if (temp == 0 && ! rotate
2264           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2265         {
2266           enum optab_methods methods1 = methods;
2267
2268           /* If trying to widen a log shift to an arithmetic shift,
2269              don't accept an arithmetic shift of the same size.  */
2270           if (unsignedp)
2271             methods1 = OPTAB_MUST_WIDEN;
2272
2273           /* Arithmetic shift */
2274
2275           temp = expand_binop (mode,
2276                                left ? lshift_optab : rshift_arith_optab,
2277                                shifted, op1, target, unsignedp, methods1);
2278         }
2279
2280       /* We used to try extzv here for logical right shifts, but that was
2281          only useful for one machine, the VAX, and caused poor code
2282          generation there for lshrdi3, so the code was deleted and a
2283          define_expand for lshrsi3 was added to vax.md.  */
2284     }
2285
2286   gcc_assert (temp);
2287   return temp;
2288 }
2289 \f
2290 /* Indicates the type of fixup needed after a constant multiplication.
2291    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2292    the result should be negated, and ADD_VARIANT means that the
2293    multiplicand should be added to the result.  */
2294 enum mult_variant {basic_variant, negate_variant, add_variant};
2295
2296 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2297                         const struct mult_cost *, enum machine_mode mode);
2298 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2299                                  struct algorithm *, enum mult_variant *, int);
2300 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2301                               const struct algorithm *, enum mult_variant);
2302 static unsigned HOST_WIDE_INT choose_multiplier (unsigned HOST_WIDE_INT, int,
2303                                                  int, rtx *, int *, int *);
2304 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2305 static rtx extract_high_half (enum machine_mode, rtx);
2306 static rtx expand_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2307 static rtx expand_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2308                                        int, int);
2309 /* Compute and return the best algorithm for multiplying by T.
2310    The algorithm must cost less than cost_limit
2311    If retval.cost >= COST_LIMIT, no algorithm was found and all
2312    other field of the returned struct are undefined.
2313    MODE is the machine mode of the multiplication.  */
2314
2315 static void
2316 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2317             const struct mult_cost *cost_limit, enum machine_mode mode)
2318 {
2319   int m;
2320   struct algorithm *alg_in, *best_alg;
2321   struct mult_cost best_cost;
2322   struct mult_cost new_limit;
2323   int op_cost, op_latency;
2324   unsigned HOST_WIDE_INT orig_t = t;
2325   unsigned HOST_WIDE_INT q;
2326   int maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (mode));
2327   int hash_index;
2328   bool cache_hit = false;
2329   enum alg_code cache_alg = alg_zero;
2330   bool speed = optimize_insn_for_speed_p ();
2331
2332   /* Indicate that no algorithm is yet found.  If no algorithm
2333      is found, this value will be returned and indicate failure.  */
2334   alg_out->cost.cost = cost_limit->cost + 1;
2335   alg_out->cost.latency = cost_limit->latency + 1;
2336
2337   if (cost_limit->cost < 0
2338       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2339     return;
2340
2341   /* Restrict the bits of "t" to the multiplication's mode.  */
2342   t &= GET_MODE_MASK (mode);
2343
2344   /* t == 1 can be done in zero cost.  */
2345   if (t == 1)
2346     {
2347       alg_out->ops = 1;
2348       alg_out->cost.cost = 0;
2349       alg_out->cost.latency = 0;
2350       alg_out->op[0] = alg_m;
2351       return;
2352     }
2353
2354   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2355      fail now.  */
2356   if (t == 0)
2357     {
2358       if (MULT_COST_LESS (cost_limit, zero_cost[speed]))
2359         return;
2360       else
2361         {
2362           alg_out->ops = 1;
2363           alg_out->cost.cost = zero_cost[speed];
2364           alg_out->cost.latency = zero_cost[speed];
2365           alg_out->op[0] = alg_zero;
2366           return;
2367         }
2368     }
2369
2370   /* We'll be needing a couple extra algorithm structures now.  */
2371
2372   alg_in = XALLOCA (struct algorithm);
2373   best_alg = XALLOCA (struct algorithm);
2374   best_cost = *cost_limit;
2375
2376   /* Compute the hash index.  */
2377   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2378
2379   /* See if we already know what to do for T.  */
2380   if (alg_hash[hash_index].t == t
2381       && alg_hash[hash_index].mode == mode
2382       && alg_hash[hash_index].mode == mode
2383       && alg_hash[hash_index].speed == speed
2384       && alg_hash[hash_index].alg != alg_unknown)
2385     {
2386       cache_alg = alg_hash[hash_index].alg;
2387
2388       if (cache_alg == alg_impossible)
2389         {
2390           /* The cache tells us that it's impossible to synthesize
2391              multiplication by T within alg_hash[hash_index].cost.  */
2392           if (!CHEAPER_MULT_COST (&alg_hash[hash_index].cost, cost_limit))
2393             /* COST_LIMIT is at least as restrictive as the one
2394                recorded in the hash table, in which case we have no
2395                hope of synthesizing a multiplication.  Just
2396                return.  */
2397             return;
2398
2399           /* If we get here, COST_LIMIT is less restrictive than the
2400              one recorded in the hash table, so we may be able to
2401              synthesize a multiplication.  Proceed as if we didn't
2402              have the cache entry.  */
2403         }
2404       else
2405         {
2406           if (CHEAPER_MULT_COST (cost_limit, &alg_hash[hash_index].cost))
2407             /* The cached algorithm shows that this multiplication
2408                requires more cost than COST_LIMIT.  Just return.  This
2409                way, we don't clobber this cache entry with
2410                alg_impossible but retain useful information.  */
2411             return;
2412
2413           cache_hit = true;
2414
2415           switch (cache_alg)
2416             {
2417             case alg_shift:
2418               goto do_alg_shift;
2419
2420             case alg_add_t_m2:
2421             case alg_sub_t_m2:
2422               goto do_alg_addsub_t_m2;
2423
2424             case alg_add_factor:
2425             case alg_sub_factor:
2426               goto do_alg_addsub_factor;
2427
2428             case alg_add_t2_m:
2429               goto do_alg_add_t2_m;
2430
2431             case alg_sub_t2_m:
2432               goto do_alg_sub_t2_m;
2433
2434             default:
2435               gcc_unreachable ();
2436             }
2437         }
2438     }
2439
2440   /* If we have a group of zero bits at the low-order part of T, try
2441      multiplying by the remaining bits and then doing a shift.  */
2442
2443   if ((t & 1) == 0)
2444     {
2445     do_alg_shift:
2446       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2447       if (m < maxm)
2448         {
2449           q = t >> m;
2450           /* The function expand_shift will choose between a shift and
2451              a sequence of additions, so the observed cost is given as
2452              MIN (m * add_cost[speed][mode], shift_cost[speed][mode][m]).  */
2453           op_cost = m * add_cost[speed][mode];
2454           if (shift_cost[speed][mode][m] < op_cost)
2455             op_cost = shift_cost[speed][mode][m];
2456           new_limit.cost = best_cost.cost - op_cost;
2457           new_limit.latency = best_cost.latency - op_cost;
2458           synth_mult (alg_in, q, &new_limit, mode);
2459
2460           alg_in->cost.cost += op_cost;
2461           alg_in->cost.latency += op_cost;
2462           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2463             {
2464               struct algorithm *x;
2465               best_cost = alg_in->cost;
2466               x = alg_in, alg_in = best_alg, best_alg = x;
2467               best_alg->log[best_alg->ops] = m;
2468               best_alg->op[best_alg->ops] = alg_shift;
2469             }
2470
2471           /* See if treating ORIG_T as a signed number yields a better
2472              sequence.  Try this sequence only for a negative ORIG_T
2473              as it would be useless for a non-negative ORIG_T.  */
2474           if ((HOST_WIDE_INT) orig_t < 0)
2475             {
2476               /* Shift ORIG_T as follows because a right shift of a
2477                  negative-valued signed type is implementation
2478                  defined.  */
2479               q = ~(~orig_t >> m);
2480               /* The function expand_shift will choose between a shift
2481                  and a sequence of additions, so the observed cost is
2482                  given as MIN (m * add_cost[speed][mode],
2483                  shift_cost[speed][mode][m]).  */
2484               op_cost = m * add_cost[speed][mode];
2485               if (shift_cost[speed][mode][m] < op_cost)
2486                 op_cost = shift_cost[speed][mode][m];
2487               new_limit.cost = best_cost.cost - op_cost;
2488               new_limit.latency = best_cost.latency - op_cost;
2489               synth_mult (alg_in, q, &new_limit, mode);
2490
2491               alg_in->cost.cost += op_cost;
2492               alg_in->cost.latency += op_cost;
2493               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2494                 {
2495                   struct algorithm *x;
2496                   best_cost = alg_in->cost;
2497                   x = alg_in, alg_in = best_alg, best_alg = x;
2498                   best_alg->log[best_alg->ops] = m;
2499                   best_alg->op[best_alg->ops] = alg_shift;
2500                 }
2501             }
2502         }
2503       if (cache_hit)
2504         goto done;
2505     }
2506
2507   /* If we have an odd number, add or subtract one.  */
2508   if ((t & 1) != 0)
2509     {
2510       unsigned HOST_WIDE_INT w;
2511
2512     do_alg_addsub_t_m2:
2513       for (w = 1; (w & t) != 0; w <<= 1)
2514         ;
2515       /* If T was -1, then W will be zero after the loop.  This is another
2516          case where T ends with ...111.  Handling this with (T + 1) and
2517          subtract 1 produces slightly better code and results in algorithm
2518          selection much faster than treating it like the ...0111 case
2519          below.  */
2520       if (w == 0
2521           || (w > 2
2522               /* Reject the case where t is 3.
2523                  Thus we prefer addition in that case.  */
2524               && t != 3))
2525         {
2526           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2527
2528           op_cost = add_cost[speed][mode];
2529           new_limit.cost = best_cost.cost - op_cost;
2530           new_limit.latency = best_cost.latency - op_cost;
2531           synth_mult (alg_in, t + 1, &new_limit, mode);
2532
2533           alg_in->cost.cost += op_cost;
2534           alg_in->cost.latency += op_cost;
2535           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2536             {
2537               struct algorithm *x;
2538               best_cost = alg_in->cost;
2539               x = alg_in, alg_in = best_alg, best_alg = x;
2540               best_alg->log[best_alg->ops] = 0;
2541               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2542             }
2543         }
2544       else
2545         {
2546           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2547
2548           op_cost = add_cost[speed][mode];
2549           new_limit.cost = best_cost.cost - op_cost;
2550           new_limit.latency = best_cost.latency - op_cost;
2551           synth_mult (alg_in, t - 1, &new_limit, mode);
2552
2553           alg_in->cost.cost += op_cost;
2554           alg_in->cost.latency += op_cost;
2555           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2556             {
2557               struct algorithm *x;
2558               best_cost = alg_in->cost;
2559               x = alg_in, alg_in = best_alg, best_alg = x;
2560               best_alg->log[best_alg->ops] = 0;
2561               best_alg->op[best_alg->ops] = alg_add_t_m2;
2562             }
2563         }
2564
2565       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2566          quickly with a - a * n for some appropriate constant n.  */
2567       m = exact_log2 (-orig_t + 1);
2568       if (m >= 0 && m < maxm)
2569         {
2570           op_cost = shiftsub1_cost[speed][mode][m];
2571           new_limit.cost = best_cost.cost - op_cost;
2572           new_limit.latency = best_cost.latency - op_cost;
2573           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m, &new_limit, mode);
2574
2575           alg_in->cost.cost += op_cost;
2576           alg_in->cost.latency += op_cost;
2577           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2578             {
2579               struct algorithm *x;
2580               best_cost = alg_in->cost;
2581               x = alg_in, alg_in = best_alg, best_alg = x;
2582               best_alg->log[best_alg->ops] = m;
2583               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2584             }
2585         }
2586
2587       if (cache_hit)
2588         goto done;
2589     }
2590
2591   /* Look for factors of t of the form
2592      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2593      If we find such a factor, we can multiply by t using an algorithm that
2594      multiplies by q, shift the result by m and add/subtract it to itself.
2595
2596      We search for large factors first and loop down, even if large factors
2597      are less probable than small; if we find a large factor we will find a
2598      good sequence quickly, and therefore be able to prune (by decreasing
2599      COST_LIMIT) the search.  */
2600
2601  do_alg_addsub_factor:
2602   for (m = floor_log2 (t - 1); m >= 2; m--)
2603     {
2604       unsigned HOST_WIDE_INT d;
2605
2606       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2607       if (t % d == 0 && t > d && m < maxm
2608           && (!cache_hit || cache_alg == alg_add_factor))
2609         {
2610           /* If the target has a cheap shift-and-add instruction use
2611              that in preference to a shift insn followed by an add insn.
2612              Assume that the shift-and-add is "atomic" with a latency
2613              equal to its cost, otherwise assume that on superscalar
2614              hardware the shift may be executed concurrently with the
2615              earlier steps in the algorithm.  */
2616           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2617           if (shiftadd_cost[speed][mode][m] < op_cost)
2618             {
2619               op_cost = shiftadd_cost[speed][mode][m];
2620               op_latency = op_cost;
2621             }
2622           else
2623             op_latency = add_cost[speed][mode];
2624
2625           new_limit.cost = best_cost.cost - op_cost;
2626           new_limit.latency = best_cost.latency - op_latency;
2627           synth_mult (alg_in, t / d, &new_limit, mode);
2628
2629           alg_in->cost.cost += op_cost;
2630           alg_in->cost.latency += op_latency;
2631           if (alg_in->cost.latency < op_cost)
2632             alg_in->cost.latency = op_cost;
2633           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2634             {
2635               struct algorithm *x;
2636               best_cost = alg_in->cost;
2637               x = alg_in, alg_in = best_alg, best_alg = x;
2638               best_alg->log[best_alg->ops] = m;
2639               best_alg->op[best_alg->ops] = alg_add_factor;
2640             }
2641           /* Other factors will have been taken care of in the recursion.  */
2642           break;
2643         }
2644
2645       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2646       if (t % d == 0 && t > d && m < maxm
2647           && (!cache_hit || cache_alg == alg_sub_factor))
2648         {
2649           /* If the target has a cheap shift-and-subtract insn use
2650              that in preference to a shift insn followed by a sub insn.
2651              Assume that the shift-and-sub is "atomic" with a latency
2652              equal to it's cost, otherwise assume that on superscalar
2653              hardware the shift may be executed concurrently with the
2654              earlier steps in the algorithm.  */
2655           op_cost = add_cost[speed][mode] + shift_cost[speed][mode][m];
2656           if (shiftsub0_cost[speed][mode][m] < op_cost)
2657             {
2658               op_cost = shiftsub0_cost[speed][mode][m];
2659               op_latency = op_cost;
2660             }
2661           else
2662             op_latency = add_cost[speed][mode];
2663
2664           new_limit.cost = best_cost.cost - op_cost;
2665           new_limit.latency = best_cost.latency - op_latency;
2666           synth_mult (alg_in, t / d, &new_limit, mode);
2667
2668           alg_in->cost.cost += op_cost;
2669           alg_in->cost.latency += op_latency;
2670           if (alg_in->cost.latency < op_cost)
2671             alg_in->cost.latency = op_cost;
2672           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2673             {
2674               struct algorithm *x;
2675               best_cost = alg_in->cost;
2676               x = alg_in, alg_in = best_alg, best_alg = x;
2677               best_alg->log[best_alg->ops] = m;
2678               best_alg->op[best_alg->ops] = alg_sub_factor;
2679             }
2680           break;
2681         }
2682     }
2683   if (cache_hit)
2684     goto done;
2685
2686   /* Try shift-and-add (load effective address) instructions,
2687      i.e. do a*3, a*5, a*9.  */
2688   if ((t & 1) != 0)
2689     {
2690     do_alg_add_t2_m:
2691       q = t - 1;
2692       q = q & -q;
2693       m = exact_log2 (q);
2694       if (m >= 0 && m < maxm)
2695         {
2696           op_cost = shiftadd_cost[speed][mode][m];
2697           new_limit.cost = best_cost.cost - op_cost;
2698           new_limit.latency = best_cost.latency - op_cost;
2699           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2700
2701           alg_in->cost.cost += op_cost;
2702           alg_in->cost.latency += op_cost;
2703           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2704             {
2705               struct algorithm *x;
2706               best_cost = alg_in->cost;
2707               x = alg_in, alg_in = best_alg, best_alg = x;
2708               best_alg->log[best_alg->ops] = m;
2709               best_alg->op[best_alg->ops] = alg_add_t2_m;
2710             }
2711         }
2712       if (cache_hit)
2713         goto done;
2714
2715     do_alg_sub_t2_m:
2716       q = t + 1;
2717       q = q & -q;
2718       m = exact_log2 (q);
2719       if (m >= 0 && m < maxm)
2720         {
2721           op_cost = shiftsub0_cost[speed][mode][m];
2722           new_limit.cost = best_cost.cost - op_cost;
2723           new_limit.latency = best_cost.latency - op_cost;
2724           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2725
2726           alg_in->cost.cost += op_cost;
2727           alg_in->cost.latency += op_cost;
2728           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2729             {
2730               struct algorithm *x;
2731               best_cost = alg_in->cost;
2732               x = alg_in, alg_in = best_alg, best_alg = x;
2733               best_alg->log[best_alg->ops] = m;
2734               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2735             }
2736         }
2737       if (cache_hit)
2738         goto done;
2739     }
2740
2741  done:
2742   /* If best_cost has not decreased, we have not found any algorithm.  */
2743   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2744     {
2745       /* We failed to find an algorithm.  Record alg_impossible for
2746          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2747          we are asked to find an algorithm for T within the same or
2748          lower COST_LIMIT, we can immediately return to the
2749          caller.  */
2750       alg_hash[hash_index].t = t;
2751       alg_hash[hash_index].mode = mode;
2752       alg_hash[hash_index].speed = speed;
2753       alg_hash[hash_index].alg = alg_impossible;
2754       alg_hash[hash_index].cost = *cost_limit;
2755       return;
2756     }
2757
2758   /* Cache the result.  */
2759   if (!cache_hit)
2760     {
2761       alg_hash[hash_index].t = t;
2762       alg_hash[hash_index].mode = mode;
2763       alg_hash[hash_index].speed = speed;
2764       alg_hash[hash_index].alg = best_alg->op[best_alg->ops];
2765       alg_hash[hash_index].cost.cost = best_cost.cost;
2766       alg_hash[hash_index].cost.latency = best_cost.latency;
2767     }
2768
2769   /* If we are getting a too long sequence for `struct algorithm'
2770      to record, make this search fail.  */
2771   if (best_alg->ops == MAX_BITS_PER_WORD)
2772     return;
2773
2774   /* Copy the algorithm from temporary space to the space at alg_out.
2775      We avoid using structure assignment because the majority of
2776      best_alg is normally undefined, and this is a critical function.  */
2777   alg_out->ops = best_alg->ops + 1;
2778   alg_out->cost = best_cost;
2779   memcpy (alg_out->op, best_alg->op,
2780           alg_out->ops * sizeof *alg_out->op);
2781   memcpy (alg_out->log, best_alg->log,
2782           alg_out->ops * sizeof *alg_out->log);
2783 }
2784 \f
2785 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2786    Try three variations:
2787
2788        - a shift/add sequence based on VAL itself
2789        - a shift/add sequence based on -VAL, followed by a negation
2790        - a shift/add sequence based on VAL - 1, followed by an addition.
2791
2792    Return true if the cheapest of these cost less than MULT_COST,
2793    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2794
2795 static bool
2796 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2797                      struct algorithm *alg, enum mult_variant *variant,
2798                      int mult_cost)
2799 {
2800   struct algorithm alg2;
2801   struct mult_cost limit;
2802   int op_cost;
2803   bool speed = optimize_insn_for_speed_p ();
2804
2805   /* Fail quickly for impossible bounds.  */
2806   if (mult_cost < 0)
2807     return false;
2808
2809   /* Ensure that mult_cost provides a reasonable upper bound.
2810      Any constant multiplication can be performed with less
2811      than 2 * bits additions.  */
2812   op_cost = 2 * GET_MODE_BITSIZE (mode) * add_cost[speed][mode];
2813   if (mult_cost > op_cost)
2814     mult_cost = op_cost;
2815
2816   *variant = basic_variant;
2817   limit.cost = mult_cost;
2818   limit.latency = mult_cost;
2819   synth_mult (alg, val, &limit, mode);
2820
2821   /* This works only if the inverted value actually fits in an
2822      `unsigned int' */
2823   if (HOST_BITS_PER_INT >= GET_MODE_BITSIZE (mode))
2824     {
2825       op_cost = neg_cost[speed][mode];
2826       if (MULT_COST_LESS (&alg->cost, mult_cost))
2827         {
2828           limit.cost = alg->cost.cost - op_cost;
2829           limit.latency = alg->cost.latency - op_cost;
2830         }
2831       else
2832         {
2833           limit.cost = mult_cost - op_cost;
2834           limit.latency = mult_cost - op_cost;
2835         }
2836
2837       synth_mult (&alg2, -val, &limit, mode);
2838       alg2.cost.cost += op_cost;
2839       alg2.cost.latency += op_cost;
2840       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2841         *alg = alg2, *variant = negate_variant;
2842     }
2843
2844   /* This proves very useful for division-by-constant.  */
2845   op_cost = add_cost[speed][mode];
2846   if (MULT_COST_LESS (&alg->cost, mult_cost))
2847     {
2848       limit.cost = alg->cost.cost - op_cost;
2849       limit.latency = alg->cost.latency - op_cost;
2850     }
2851   else
2852     {
2853       limit.cost = mult_cost - op_cost;
2854       limit.latency = mult_cost - op_cost;
2855     }
2856
2857   synth_mult (&alg2, val - 1, &limit, mode);
2858   alg2.cost.cost += op_cost;
2859   alg2.cost.latency += op_cost;
2860   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2861     *alg = alg2, *variant = add_variant;
2862
2863   return MULT_COST_LESS (&alg->cost, mult_cost);
2864 }
2865
2866 /* A subroutine of expand_mult, used for constant multiplications.
2867    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2868    convenient.  Use the shift/add sequence described by ALG and apply
2869    the final fixup specified by VARIANT.  */
2870
2871 static rtx
2872 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2873                    rtx target, const struct algorithm *alg,
2874                    enum mult_variant variant)
2875 {
2876   HOST_WIDE_INT val_so_far;
2877   rtx insn, accum, tem;
2878   int opno;
2879   enum machine_mode nmode;
2880
2881   /* Avoid referencing memory over and over and invalid sharing
2882      on SUBREGs.  */
2883   op0 = force_reg (mode, op0);
2884
2885   /* ACCUM starts out either as OP0 or as a zero, depending on
2886      the first operation.  */
2887
2888   if (alg->op[0] == alg_zero)
2889     {
2890       accum = copy_to_mode_reg (mode, const0_rtx);
2891       val_so_far = 0;
2892     }
2893   else if (alg->op[0] == alg_m)
2894     {
2895       accum = copy_to_mode_reg (mode, op0);
2896       val_so_far = 1;
2897     }
2898   else
2899     gcc_unreachable ();
2900
2901   for (opno = 1; opno < alg->ops; opno++)
2902     {
2903       int log = alg->log[opno];
2904       rtx shift_subtarget = optimize ? 0 : accum;
2905       rtx add_target
2906         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2907            && !optimize)
2908           ? target : 0;
2909       rtx accum_target = optimize ? 0 : accum;
2910
2911       switch (alg->op[opno])
2912         {
2913         case alg_shift:
2914           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2915                                 build_int_cst (NULL_TREE, log),
2916                                 NULL_RTX, 0);
2917           val_so_far <<= log;
2918           break;
2919
2920         case alg_add_t_m2:
2921           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2922                               build_int_cst (NULL_TREE, log),
2923                               NULL_RTX, 0);
2924           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2925                                  add_target ? add_target : accum_target);
2926           val_so_far += (HOST_WIDE_INT) 1 << log;
2927           break;
2928
2929         case alg_sub_t_m2:
2930           tem = expand_shift (LSHIFT_EXPR, mode, op0,
2931                               build_int_cst (NULL_TREE, log),
2932                               NULL_RTX, 0);
2933           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2934                                  add_target ? add_target : accum_target);
2935           val_so_far -= (HOST_WIDE_INT) 1 << log;
2936           break;
2937
2938         case alg_add_t2_m:
2939           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2940                                 build_int_cst (NULL_TREE, log),
2941                                 shift_subtarget,
2942                                 0);
2943           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2944                                  add_target ? add_target : accum_target);
2945           val_so_far = (val_so_far << log) + 1;
2946           break;
2947
2948         case alg_sub_t2_m:
2949           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2950                                 build_int_cst (NULL_TREE, log),
2951                                 shift_subtarget, 0);
2952           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2953                                  add_target ? add_target : accum_target);
2954           val_so_far = (val_so_far << log) - 1;
2955           break;
2956
2957         case alg_add_factor:
2958           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2959                               build_int_cst (NULL_TREE, log),
2960                               NULL_RTX, 0);
2961           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2962                                  add_target ? add_target : accum_target);
2963           val_so_far += val_so_far << log;
2964           break;
2965
2966         case alg_sub_factor:
2967           tem = expand_shift (LSHIFT_EXPR, mode, accum,
2968                               build_int_cst (NULL_TREE, log),
2969                               NULL_RTX, 0);
2970           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2971                                  (add_target
2972                                   ? add_target : (optimize ? 0 : tem)));
2973           val_so_far = (val_so_far << log) - val_so_far;
2974           break;
2975
2976         default:
2977           gcc_unreachable ();
2978         }
2979
2980       /* Write a REG_EQUAL note on the last insn so that we can cse
2981          multiplication sequences.  Note that if ACCUM is a SUBREG,
2982          we've set the inner register and must properly indicate
2983          that.  */
2984
2985       tem = op0, nmode = mode;
2986       if (GET_CODE (accum) == SUBREG)
2987         {
2988           nmode = GET_MODE (SUBREG_REG (accum));
2989           tem = gen_lowpart (nmode, op0);
2990         }
2991
2992       insn = get_last_insn ();
2993       set_unique_reg_note (insn, REG_EQUAL,
2994                            gen_rtx_MULT (nmode, tem,
2995                                          GEN_INT (val_so_far)));
2996     }
2997
2998   if (variant == negate_variant)
2999     {
3000       val_so_far = -val_so_far;
3001       accum = expand_unop (mode, neg_optab, accum, target, 0);
3002     }
3003   else if (variant == add_variant)
3004     {
3005       val_so_far = val_so_far + 1;
3006       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3007     }
3008
3009   /* Compare only the bits of val and val_so_far that are significant
3010      in the result mode, to avoid sign-/zero-extension confusion.  */
3011   val &= GET_MODE_MASK (mode);
3012   val_so_far &= GET_MODE_MASK (mode);
3013   gcc_assert (val == val_so_far);
3014
3015   return accum;
3016 }
3017
3018 /* Perform a multiplication and return an rtx for the result.
3019    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3020    TARGET is a suggestion for where to store the result (an rtx).
3021
3022    We check specially for a constant integer as OP1.
3023    If you want this check for OP0 as well, then before calling
3024    you should swap the two operands if OP0 would be constant.  */
3025
3026 rtx
3027 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3028              int unsignedp)
3029 {
3030   enum mult_variant variant;
3031   struct algorithm algorithm;
3032   int max_cost;
3033   bool speed = optimize_insn_for_speed_p ();
3034
3035   /* Handling const0_rtx here allows us to use zero as a rogue value for
3036      coeff below.  */
3037   if (op1 == const0_rtx)
3038     return const0_rtx;
3039   if (op1 == const1_rtx)
3040     return op0;
3041   if (op1 == constm1_rtx)
3042     return expand_unop (mode,
3043                         GET_MODE_CLASS (mode) == MODE_INT
3044                         && !unsignedp && flag_trapv
3045                         ? negv_optab : neg_optab,
3046                         op0, target, 0);
3047
3048   /* These are the operations that are potentially turned into a sequence
3049      of shifts and additions.  */
3050   if (SCALAR_INT_MODE_P (mode)
3051       && (unsignedp || !flag_trapv))
3052     {
3053       HOST_WIDE_INT coeff = 0;
3054       rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3055
3056       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3057          less than or equal in size to `unsigned int' this doesn't matter.
3058          If the mode is larger than `unsigned int', then synth_mult works
3059          only if the constant value exactly fits in an `unsigned int' without
3060          any truncation.  This means that multiplying by negative values does
3061          not work; results are off by 2^32 on a 32 bit machine.  */
3062
3063       if (CONST_INT_P (op1))
3064         {
3065           /* Attempt to handle multiplication of DImode values by negative
3066              coefficients, by performing the multiplication by a positive
3067              multiplier and then inverting the result.  */
3068           if (INTVAL (op1) < 0
3069               && GET_MODE_BITSIZE (mode) > HOST_BITS_PER_WIDE_INT)
3070             {
3071               /* Its safe to use -INTVAL (op1) even for INT_MIN, as the
3072                  result is interpreted as an unsigned coefficient.
3073                  Exclude cost of op0 from max_cost to match the cost
3074                  calculation of the synth_mult.  */
3075               max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed)
3076                          - neg_cost[speed][mode];
3077               if (max_cost > 0
3078                   && choose_mult_variant (mode, -INTVAL (op1), &algorithm,
3079                                           &variant, max_cost))
3080                 {
3081                   rtx temp = expand_mult_const (mode, op0, -INTVAL (op1),
3082                                                 NULL_RTX, &algorithm,
3083                                                 variant);
3084                   return expand_unop (mode, neg_optab, temp, target, 0);
3085                 }
3086             }
3087           else coeff = INTVAL (op1);
3088         }
3089       else if (GET_CODE (op1) == CONST_DOUBLE)
3090         {
3091           /* If we are multiplying in DImode, it may still be a win
3092              to try to work with shifts and adds.  */
3093           if (CONST_DOUBLE_HIGH (op1) == 0
3094               && CONST_DOUBLE_LOW (op1) > 0)
3095             coeff = CONST_DOUBLE_LOW (op1);
3096           else if (CONST_DOUBLE_LOW (op1) == 0
3097                    && EXACT_POWER_OF_2_OR_ZERO_P (CONST_DOUBLE_HIGH (op1)))
3098             {
3099               int shift = floor_log2 (CONST_DOUBLE_HIGH (op1))
3100                           + HOST_BITS_PER_WIDE_INT;
3101               return expand_shift (LSHIFT_EXPR, mode, op0,
3102                                    build_int_cst (NULL_TREE, shift),
3103                                    target, unsignedp);
3104             }
3105         }
3106
3107       /* We used to test optimize here, on the grounds that it's better to
3108          produce a smaller program when -O is not used.  But this causes
3109          such a terrible slowdown sometimes that it seems better to always
3110          use synth_mult.  */
3111       if (coeff != 0)
3112         {
3113           /* Special case powers of two.  */
3114           if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3115             return expand_shift (LSHIFT_EXPR, mode, op0,
3116                                  build_int_cst (NULL_TREE, floor_log2 (coeff)),
3117                                  target, unsignedp);
3118
3119           /* Exclude cost of op0 from max_cost to match the cost
3120              calculation of the synth_mult.  */
3121           max_cost = rtx_cost (gen_rtx_MULT (mode, fake_reg, op1), SET, speed);
3122           if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3123                                    max_cost))
3124             return expand_mult_const (mode, op0, coeff, target,
3125                                       &algorithm, variant);
3126         }
3127     }
3128
3129   if (GET_CODE (op0) == CONST_DOUBLE)
3130     {
3131       rtx temp = op0;
3132       op0 = op1;
3133       op1 = temp;
3134     }
3135
3136   /* Expand x*2.0 as x+x.  */
3137   if (GET_CODE (op1) == CONST_DOUBLE
3138       && SCALAR_FLOAT_MODE_P (mode))
3139     {
3140       REAL_VALUE_TYPE d;
3141       REAL_VALUE_FROM_CONST_DOUBLE (d, op1);
3142
3143       if (REAL_VALUES_EQUAL (d, dconst2))
3144         {
3145           op0 = force_reg (GET_MODE (op0), op0);
3146           return expand_binop (mode, add_optab, op0, op0,
3147                                target, unsignedp, OPTAB_LIB_WIDEN);
3148         }
3149     }
3150
3151   /* This used to use umul_optab if unsigned, but for non-widening multiply
3152      there is no difference between signed and unsigned.  */
3153   op0 = expand_binop (mode,
3154                       ! unsignedp
3155                       && flag_trapv && (GET_MODE_CLASS(mode) == MODE_INT)
3156                       ? smulv_optab : smul_optab,
3157                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3158   gcc_assert (op0);
3159   return op0;
3160 }
3161
3162 /* Perform a widening multiplication and return an rtx for the result.
3163    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3164    TARGET is a suggestion for where to store the result (an rtx).
3165    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3166    or smul_widen_optab.
3167
3168    We check specially for a constant integer as OP1, comparing the
3169    cost of a widening multiply against the cost of a sequence of shifts
3170    and adds.  */
3171
3172 rtx
3173 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3174                       int unsignedp, optab this_optab)
3175 {
3176   bool speed = optimize_insn_for_speed_p ();
3177
3178   if (CONST_INT_P (op1)
3179       && (INTVAL (op1) >= 0
3180           || GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT))
3181     {
3182       HOST_WIDE_INT coeff = INTVAL (op1);
3183       int max_cost;
3184       enum mult_variant variant;
3185       struct algorithm algorithm;
3186
3187       /* Special case powers of two.  */
3188       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3189         {
3190           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3191           return expand_shift (LSHIFT_EXPR, mode, op0,
3192                                build_int_cst (NULL_TREE, floor_log2 (coeff)),
3193                                target, unsignedp);
3194         }
3195
3196       /* Exclude cost of op0 from max_cost to match the cost
3197          calculation of the synth_mult.  */
3198       max_cost = mul_widen_cost[speed][mode];
3199       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3200                                max_cost))
3201         {
3202           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3203           return expand_mult_const (mode, op0, coeff, target,
3204                                     &algorithm, variant);
3205         }
3206     }
3207   return expand_binop (mode, this_optab, op0, op1, target,
3208                        unsignedp, OPTAB_LIB_WIDEN);
3209 }
3210 \f
3211 /* Return the smallest n such that 2**n >= X.  */
3212
3213 int
3214 ceil_log2 (unsigned HOST_WIDE_INT x)
3215 {
3216   return floor_log2 (x - 1) + 1;
3217 }
3218
3219 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3220    replace division by D, and put the least significant N bits of the result
3221    in *MULTIPLIER_PTR and return the most significant bit.
3222
3223    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3224    needed precision is in PRECISION (should be <= N).
3225
3226    PRECISION should be as small as possible so this function can choose
3227    multiplier more freely.
3228
3229    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3230    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3231
3232    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3233    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3234
3235 static
3236 unsigned HOST_WIDE_INT
3237 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3238                    rtx *multiplier_ptr, int *post_shift_ptr, int *lgup_ptr)
3239 {
3240   HOST_WIDE_INT mhigh_hi, mlow_hi;
3241   unsigned HOST_WIDE_INT mhigh_lo, mlow_lo;
3242   int lgup, post_shift;
3243   int pow, pow2;
3244   unsigned HOST_WIDE_INT nl, dummy1;
3245   HOST_WIDE_INT nh, dummy2;
3246
3247   /* lgup = ceil(log2(divisor)); */
3248   lgup = ceil_log2 (d);
3249
3250   gcc_assert (lgup <= n);
3251
3252   pow = n + lgup;
3253   pow2 = n + lgup - precision;
3254
3255   /* We could handle this with some effort, but this case is much
3256      better handled directly with a scc insn, so rely on caller using
3257      that.  */
3258   gcc_assert (pow != 2 * HOST_BITS_PER_WIDE_INT);
3259
3260   /* mlow = 2^(N + lgup)/d */
3261  if (pow >= HOST_BITS_PER_WIDE_INT)
3262     {
3263       nh = (HOST_WIDE_INT) 1 << (pow - HOST_BITS_PER_WIDE_INT);
3264       nl = 0;
3265     }
3266   else
3267     {
3268       nh = 0;
3269       nl = (unsigned HOST_WIDE_INT) 1 << pow;
3270     }
3271   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3272                         &mlow_lo, &mlow_hi, &dummy1, &dummy2);
3273
3274   /* mhigh = (2^(N + lgup) + 2^N + lgup - precision)/d */
3275   if (pow2 >= HOST_BITS_PER_WIDE_INT)
3276     nh |= (HOST_WIDE_INT) 1 << (pow2 - HOST_BITS_PER_WIDE_INT);
3277   else
3278     nl |= (unsigned HOST_WIDE_INT) 1 << pow2;
3279   div_and_round_double (TRUNC_DIV_EXPR, 1, nl, nh, d, (HOST_WIDE_INT) 0,
3280                         &mhigh_lo, &mhigh_hi, &dummy1, &dummy2);
3281
3282   gcc_assert (!mhigh_hi || nh - d < d);
3283   gcc_assert (mhigh_hi <= 1 && mlow_hi <= 1);
3284   /* Assert that mlow < mhigh.  */
3285   gcc_assert (mlow_hi < mhigh_hi
3286               || (mlow_hi == mhigh_hi && mlow_lo < mhigh_lo));
3287
3288   /* If precision == N, then mlow, mhigh exceed 2^N
3289      (but they do not exceed 2^(N+1)).  */
3290
3291   /* Reduce to lowest terms.  */
3292   for (post_shift = lgup; post_shift > 0; post_shift--)
3293     {
3294       unsigned HOST_WIDE_INT ml_lo = (mlow_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mlow_lo >> 1);
3295       unsigned HOST_WIDE_INT mh_lo = (mhigh_hi << (HOST_BITS_PER_WIDE_INT - 1)) | (mhigh_lo >> 1);
3296       if (ml_lo >= mh_lo)
3297         break;
3298
3299       mlow_hi = 0;
3300       mlow_lo = ml_lo;
3301       mhigh_hi = 0;
3302       mhigh_lo = mh_lo;
3303     }
3304
3305   *post_shift_ptr = post_shift;
3306   *lgup_ptr = lgup;
3307   if (n < HOST_BITS_PER_WIDE_INT)
3308     {
3309       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3310       *multiplier_ptr = GEN_INT (mhigh_lo & mask);
3311       return mhigh_lo >= mask;
3312     }
3313   else
3314     {
3315       *multiplier_ptr = GEN_INT (mhigh_lo);
3316       return mhigh_hi;
3317     }
3318 }
3319
3320 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3321    congruent to 1 (mod 2**N).  */
3322
3323 static unsigned HOST_WIDE_INT
3324 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3325 {
3326   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3327
3328   /* The algorithm notes that the choice y = x satisfies
3329      x*y == 1 mod 2^3, since x is assumed odd.
3330      Each iteration doubles the number of bits of significance in y.  */
3331
3332   unsigned HOST_WIDE_INT mask;
3333   unsigned HOST_WIDE_INT y = x;
3334   int nbit = 3;
3335
3336   mask = (n == HOST_BITS_PER_WIDE_INT
3337           ? ~(unsigned HOST_WIDE_INT) 0
3338           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3339
3340   while (nbit < n)
3341     {
3342       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3343       nbit *= 2;
3344     }
3345   return y;
3346 }
3347
3348 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3349    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3350    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3351    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3352    become signed.
3353
3354    The result is put in TARGET if that is convenient.
3355
3356    MODE is the mode of operation.  */
3357
3358 rtx
3359 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3360                              rtx op1, rtx target, int unsignedp)
3361 {
3362   rtx tem;
3363   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3364
3365   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3366                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3367                       NULL_RTX, 0);
3368   tem = expand_and (mode, tem, op1, NULL_RTX);
3369   adj_operand
3370     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3371                      adj_operand);
3372
3373   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3374                       build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode) - 1),
3375                       NULL_RTX, 0);
3376   tem = expand_and (mode, tem, op0, NULL_RTX);
3377   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3378                           target);
3379
3380   return target;
3381 }
3382
3383 /* Subroutine of expand_mult_highpart.  Return the MODE high part of OP.  */
3384
3385 static rtx
3386 extract_high_half (enum machine_mode mode, rtx op)
3387 {
3388   enum machine_mode wider_mode;
3389
3390   if (mode == word_mode)
3391     return gen_highpart (mode, op);
3392
3393   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3394
3395   wider_mode = GET_MODE_WIDER_MODE (mode);
3396   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3397                      build_int_cst (NULL_TREE, GET_MODE_BITSIZE (mode)), 0, 1);
3398   return convert_modes (mode, wider_mode, op, 0);
3399 }
3400
3401 /* Like expand_mult_highpart, but only consider using a multiplication
3402    optab.  OP1 is an rtx for the constant operand.  */
3403
3404 static rtx
3405 expand_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3406                             rtx target, int unsignedp, int max_cost)
3407 {
3408   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3409   enum machine_mode wider_mode;
3410   optab moptab;
3411   rtx tem;
3412   int size;
3413   bool speed = optimize_insn_for_speed_p ();
3414
3415   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3416
3417   wider_mode = GET_MODE_WIDER_MODE (mode);
3418   size = GET_MODE_BITSIZE (mode);
3419
3420   /* Firstly, try using a multiplication insn that only generates the needed
3421      high part of the product, and in the sign flavor of unsignedp.  */
3422   if (mul_highpart_cost[speed][mode] < max_cost)
3423     {
3424       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3425       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3426                           unsignedp, OPTAB_DIRECT);
3427       if (tem)
3428         return tem;
3429     }
3430
3431   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3432      Need to adjust the result after the multiplication.  */
3433   if (size - 1 < BITS_PER_WORD
3434       && (mul_highpart_cost[speed][mode] + 2 * shift_cost[speed][mode][size-1]
3435           + 4 * add_cost[speed][mode] < max_cost))
3436     {
3437       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3438       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3439                           unsignedp, OPTAB_DIRECT);
3440       if (tem)
3441         /* We used the wrong signedness.  Adjust the result.  */
3442         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3443                                             tem, unsignedp);
3444     }
3445
3446   /* Try widening multiplication.  */
3447   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3448   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3449       && mul_widen_cost[speed][wider_mode] < max_cost)
3450     {
3451       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3452                           unsignedp, OPTAB_WIDEN);
3453       if (tem)
3454         return extract_high_half (mode, tem);
3455     }
3456
3457   /* Try widening the mode and perform a non-widening multiplication.  */
3458   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3459       && size - 1 < BITS_PER_WORD
3460       && mul_cost[speed][wider_mode] + shift_cost[speed][mode][size-1] < max_cost)
3461     {
3462       rtx insns, wop0, wop1;
3463
3464       /* We need to widen the operands, for example to ensure the
3465          constant multiplier is correctly sign or zero extended.
3466          Use a sequence to clean-up any instructions emitted by
3467          the conversions if things don't work out.  */
3468       start_sequence ();
3469       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3470       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3471       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3472                           unsignedp, OPTAB_WIDEN);
3473       insns = get_insns ();
3474       end_sequence ();
3475
3476       if (tem)
3477         {
3478           emit_insn (insns);
3479           return extract_high_half (mode, tem);
3480         }
3481     }
3482
3483   /* Try widening multiplication of opposite signedness, and adjust.  */
3484   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3485   if (optab_handler (moptab, wider_mode) != CODE_FOR_nothing
3486       && size - 1 < BITS_PER_WORD
3487       && (mul_widen_cost[speed][wider_mode] + 2 * shift_cost[speed][mode][size-1]
3488           + 4 * add_cost[speed][mode] < max_cost))
3489     {
3490       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3491                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3492       if (tem != 0)
3493         {
3494           tem = extract_high_half (mode, tem);
3495           /* We used the wrong signedness.  Adjust the result.  */
3496           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3497                                               target, unsignedp);
3498         }
3499     }
3500
3501   return 0;
3502 }
3503
3504 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3505    putting the high half of the result in TARGET if that is convenient,
3506    and return where the result is.  If the operation can not be performed,
3507    0 is returned.
3508
3509    MODE is the mode of operation and result.
3510
3511    UNSIGNEDP nonzero means unsigned multiply.
3512
3513    MAX_COST is the total allowed cost for the expanded RTL.  */
3514
3515 static rtx
3516 expand_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3517                       rtx target, int unsignedp, int max_cost)
3518 {
3519   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3520   unsigned HOST_WIDE_INT cnst1;
3521   int extra_cost;
3522   bool sign_adjust = false;
3523   enum mult_variant variant;
3524   struct algorithm alg;
3525   rtx tem;
3526   bool speed = optimize_insn_for_speed_p ();
3527
3528   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3529   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3530   gcc_assert (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT);
3531
3532   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3533
3534   /* We can't optimize modes wider than BITS_PER_WORD.
3535      ??? We might be able to perform double-word arithmetic if
3536      mode == word_mode, however all the cost calculations in
3537      synth_mult etc. assume single-word operations.  */
3538   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3539     return expand_mult_highpart_optab (mode, op0, op1, target,
3540                                        unsignedp, max_cost);
3541
3542   extra_cost = shift_cost[speed][mode][GET_MODE_BITSIZE (mode) - 1];
3543
3544   /* Check whether we try to multiply by a negative constant.  */
3545   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3546     {
3547       sign_adjust = true;
3548       extra_cost += add_cost[speed][mode];
3549     }
3550
3551   /* See whether shift/add multiplication is cheap enough.  */
3552   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3553                            max_cost - extra_cost))
3554     {
3555       /* See whether the specialized multiplication optabs are
3556          cheaper than the shift/add version.  */
3557       tem = expand_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3558                                         alg.cost.cost + extra_cost);
3559       if (tem)
3560         return tem;
3561
3562       tem = convert_to_mode (wider_mode, op0, unsignedp);
3563       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3564       tem = extract_high_half (mode, tem);
3565
3566       /* Adjust result for signedness.  */
3567       if (sign_adjust)
3568         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3569
3570       return tem;
3571     }
3572   return expand_mult_highpart_optab (mode, op0, op1, target,
3573                                      unsignedp, max_cost);
3574 }
3575
3576
3577 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3578
3579 static rtx
3580 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3581 {
3582   unsigned HOST_WIDE_INT masklow, maskhigh;
3583   rtx result, temp, shift, label;
3584   int logd;
3585
3586   logd = floor_log2 (d);
3587   result = gen_reg_rtx (mode);
3588
3589   /* Avoid conditional branches when they're expensive.  */
3590   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3591       && optimize_insn_for_speed_p ())
3592     {
3593       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3594                                       mode, 0, -1);
3595       if (signmask)
3596         {
3597           signmask = force_reg (mode, signmask);
3598           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3599           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3600
3601           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3602              which instruction sequence to use.  If logical right shifts
3603              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3604              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3605
3606           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3607           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3608               || rtx_cost (temp, SET, optimize_insn_for_speed_p ()) > COSTS_N_INSNS (2))
3609             {
3610               temp = expand_binop (mode, xor_optab, op0, signmask,
3611                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3612               temp = expand_binop (mode, sub_optab, temp, signmask,
3613                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3614               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3615                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3616               temp = expand_binop (mode, xor_optab, temp, signmask,
3617                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3618               temp = expand_binop (mode, sub_optab, temp, signmask,
3619                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3620             }
3621           else
3622             {
3623               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3624                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3625               signmask = force_reg (mode, signmask);
3626
3627               temp = expand_binop (mode, add_optab, op0, signmask,
3628                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3629               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3630                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3631               temp = expand_binop (mode, sub_optab, temp, signmask,
3632                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3633             }
3634           return temp;
3635         }
3636     }
3637
3638   /* Mask contains the mode's signbit and the significant bits of the
3639      modulus.  By including the signbit in the operation, many targets
3640      can avoid an explicit compare operation in the following comparison
3641      against zero.  */
3642
3643   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3644   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3645     {
3646       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3647       maskhigh = -1;
3648     }
3649   else
3650     maskhigh = (HOST_WIDE_INT) -1
3651                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3652
3653   temp = expand_binop (mode, and_optab, op0,
3654                        immed_double_const (masklow, maskhigh, mode),
3655                        result, 1, OPTAB_LIB_WIDEN);
3656   if (temp != result)
3657     emit_move_insn (result, temp);
3658
3659   label = gen_label_rtx ();
3660   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3661
3662   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3663                        0, OPTAB_LIB_WIDEN);
3664   masklow = (HOST_WIDE_INT) -1 << logd;
3665   maskhigh = -1;
3666   temp = expand_binop (mode, ior_optab, temp,
3667                        immed_double_const (masklow, maskhigh, mode),
3668                        result, 1, OPTAB_LIB_WIDEN);
3669   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3670                        0, OPTAB_LIB_WIDEN);
3671   if (temp != result)
3672     emit_move_insn (result, temp);
3673   emit_label (label);
3674   return result;
3675 }
3676
3677 /* Expand signed division of OP0 by a power of two D in mode MODE.
3678    This routine is only called for positive values of D.  */
3679
3680 static rtx
3681 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3682 {
3683   rtx temp, label;
3684   tree shift;
3685   int logd;
3686
3687   logd = floor_log2 (d);
3688   shift = build_int_cst (NULL_TREE, logd);
3689
3690   if (d == 2
3691       && BRANCH_COST (optimize_insn_for_speed_p (),
3692                       false) >= 1)
3693     {
3694       temp = gen_reg_rtx (mode);
3695       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3696       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3697                            0, OPTAB_LIB_WIDEN);
3698       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3699     }
3700
3701 #ifdef HAVE_conditional_move
3702   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3703       >= 2)
3704     {
3705       rtx temp2;
3706
3707       /* ??? emit_conditional_move forces a stack adjustment via
3708          compare_from_rtx so, if the sequence is discarded, it will
3709          be lost.  Do it now instead.  */
3710       do_pending_stack_adjust ();
3711
3712       start_sequence ();
3713       temp2 = copy_to_mode_reg (mode, op0);
3714       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3715                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3716       temp = force_reg (mode, temp);
3717
3718       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3719       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3720                                      mode, temp, temp2, mode, 0);
3721       if (temp2)
3722         {
3723           rtx seq = get_insns ();
3724           end_sequence ();
3725           emit_insn (seq);
3726           return expand_shift (RSHIFT_EXPR, mode, temp2, shift, NULL_RTX, 0);
3727         }
3728       end_sequence ();
3729     }
3730 #endif
3731
3732   if (BRANCH_COST (optimize_insn_for_speed_p (),
3733                    false) >= 2)
3734     {
3735       int ushift = GET_MODE_BITSIZE (mode) - logd;
3736
3737       temp = gen_reg_rtx (mode);
3738       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3739       if (shift_cost[optimize_insn_for_speed_p ()][mode][ushift] > COSTS_N_INSNS (1))
3740         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3741                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3742       else
3743         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3744                              build_int_cst (NULL_TREE, ushift),
3745                              NULL_RTX, 1);
3746       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3747                            0, OPTAB_LIB_WIDEN);
3748       return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3749     }
3750
3751   label = gen_label_rtx ();
3752   temp = copy_to_mode_reg (mode, op0);
3753   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3754   expand_inc (temp, GEN_INT (d - 1));
3755   emit_label (label);
3756   return expand_shift (RSHIFT_EXPR, mode, temp, shift, NULL_RTX, 0);
3757 }
3758 \f
3759 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3760    if that is convenient, and returning where the result is.
3761    You may request either the quotient or the remainder as the result;
3762    specify REM_FLAG nonzero to get the remainder.
3763
3764    CODE is the expression code for which kind of division this is;
3765    it controls how rounding is done.  MODE is the machine mode to use.
3766    UNSIGNEDP nonzero means do unsigned division.  */
3767
3768 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3769    and then correct it by or'ing in missing high bits
3770    if result of ANDI is nonzero.
3771    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3772    This could optimize to a bfexts instruction.
3773    But C doesn't use these operations, so their optimizations are
3774    left for later.  */
3775 /* ??? For modulo, we don't actually need the highpart of the first product,
3776    the low part will do nicely.  And for small divisors, the second multiply
3777    can also be a low-part only multiply or even be completely left out.
3778    E.g. to calculate the remainder of a division by 3 with a 32 bit
3779    multiply, multiply with 0x55555556 and extract the upper two bits;
3780    the result is exact for inputs up to 0x1fffffff.
3781    The input range can be reduced by using cross-sum rules.
3782    For odd divisors >= 3, the following table gives right shift counts
3783    so that if a number is shifted by an integer multiple of the given
3784    amount, the remainder stays the same:
3785    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3786    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3787    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3788    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3789    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3790
3791    Cross-sum rules for even numbers can be derived by leaving as many bits
3792    to the right alone as the divisor has zeros to the right.
3793    E.g. if x is an unsigned 32 bit number:
3794    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3795    */
3796
3797 rtx
3798 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3799                rtx op0, rtx op1, rtx target, int unsignedp)
3800 {
3801   enum machine_mode compute_mode;
3802   rtx tquotient;
3803   rtx quotient = 0, remainder = 0;
3804   rtx last;
3805   int size;
3806   rtx insn, set;
3807   optab optab1, optab2;
3808   int op1_is_constant, op1_is_pow2 = 0;
3809   int max_cost, extra_cost;
3810   static HOST_WIDE_INT last_div_const = 0;
3811   static HOST_WIDE_INT ext_op1;
3812   bool speed = optimize_insn_for_speed_p ();
3813
3814   op1_is_constant = CONST_INT_P (op1);
3815   if (op1_is_constant)
3816     {
3817       ext_op1 = INTVAL (op1);
3818       if (unsignedp)
3819         ext_op1 &= GET_MODE_MASK (mode);
3820       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3821                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3822     }
3823
3824   /*
3825      This is the structure of expand_divmod:
3826
3827      First comes code to fix up the operands so we can perform the operations
3828      correctly and efficiently.
3829
3830      Second comes a switch statement with code specific for each rounding mode.
3831      For some special operands this code emits all RTL for the desired
3832      operation, for other cases, it generates only a quotient and stores it in
3833      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3834      to indicate that it has not done anything.
3835
3836      Last comes code that finishes the operation.  If QUOTIENT is set and
3837      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3838      QUOTIENT is not set, it is computed using trunc rounding.
3839
3840      We try to generate special code for division and remainder when OP1 is a
3841      constant.  If |OP1| = 2**n we can use shifts and some other fast
3842      operations.  For other values of OP1, we compute a carefully selected
3843      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3844      by m.
3845
3846      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3847      half of the product.  Different strategies for generating the product are
3848      implemented in expand_mult_highpart.
3849
3850      If what we actually want is the remainder, we generate that by another
3851      by-constant multiplication and a subtraction.  */
3852
3853   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3854      code below will malfunction if we are, so check here and handle
3855      the special case if so.  */
3856   if (op1 == const1_rtx)
3857     return rem_flag ? const0_rtx : op0;
3858
3859     /* When dividing by -1, we could get an overflow.
3860      negv_optab can handle overflows.  */
3861   if (! unsignedp && op1 == constm1_rtx)
3862     {
3863       if (rem_flag)
3864         return const0_rtx;
3865       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3866                           ? negv_optab : neg_optab, op0, target, 0);
3867     }
3868
3869   if (target
3870       /* Don't use the function value register as a target
3871          since we have to read it as well as write it,
3872          and function-inlining gets confused by this.  */
3873       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3874           /* Don't clobber an operand while doing a multi-step calculation.  */
3875           || ((rem_flag || op1_is_constant)
3876               && (reg_mentioned_p (target, op0)
3877                   || (MEM_P (op0) && MEM_P (target))))
3878           || reg_mentioned_p (target, op1)
3879           || (MEM_P (op1) && MEM_P (target))))
3880     target = 0;
3881
3882   /* Get the mode in which to perform this computation.  Normally it will
3883      be MODE, but sometimes we can't do the desired operation in MODE.
3884      If so, pick a wider mode in which we can do the operation.  Convert
3885      to that mode at the start to avoid repeated conversions.
3886
3887      First see what operations we need.  These depend on the expression
3888      we are evaluating.  (We assume that divxx3 insns exist under the
3889      same conditions that modxx3 insns and that these insns don't normally
3890      fail.  If these assumptions are not correct, we may generate less
3891      efficient code in some cases.)
3892
3893      Then see if we find a mode in which we can open-code that operation
3894      (either a division, modulus, or shift).  Finally, check for the smallest
3895      mode for which we can do the operation with a library call.  */
3896
3897   /* We might want to refine this now that we have division-by-constant
3898      optimization.  Since expand_mult_highpart tries so many variants, it is
3899      not straightforward to generalize this.  Maybe we should make an array
3900      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3901
3902   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3903             ? (unsignedp ? lshr_optab : ashr_optab)
3904             : (unsignedp ? udiv_optab : sdiv_optab));
3905   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3906             ? optab1
3907             : (unsignedp ? udivmod_optab : sdivmod_optab));
3908
3909   for (compute_mode = mode; compute_mode != VOIDmode;
3910        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3911     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3912         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3913       break;
3914
3915   if (compute_mode == VOIDmode)
3916     for (compute_mode = mode; compute_mode != VOIDmode;
3917          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3918       if (optab_libfunc (optab1, compute_mode)
3919           || optab_libfunc (optab2, compute_mode))
3920         break;
3921
3922   /* If we still couldn't find a mode, use MODE, but expand_binop will
3923      probably die.  */
3924   if (compute_mode == VOIDmode)
3925     compute_mode = mode;
3926
3927   if (target && GET_MODE (target) == compute_mode)
3928     tquotient = target;
3929   else
3930     tquotient = gen_reg_rtx (compute_mode);
3931
3932   size = GET_MODE_BITSIZE (compute_mode);
3933 #if 0
3934   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3935      (mode), and thereby get better code when OP1 is a constant.  Do that
3936      later.  It will require going over all usages of SIZE below.  */
3937   size = GET_MODE_BITSIZE (mode);
3938 #endif
3939
3940   /* Only deduct something for a REM if the last divide done was
3941      for a different constant.   Then set the constant of the last
3942      divide.  */
3943   max_cost = unsignedp ? udiv_cost[speed][compute_mode] : sdiv_cost[speed][compute_mode];
3944   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3945                      && INTVAL (op1) == last_div_const))
3946     max_cost -= mul_cost[speed][compute_mode] + add_cost[speed][compute_mode];
3947
3948   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3949
3950   /* Now convert to the best mode to use.  */
3951   if (compute_mode != mode)
3952     {
3953       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3954       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3955
3956       /* convert_modes may have placed op1 into a register, so we
3957          must recompute the following.  */
3958       op1_is_constant = CONST_INT_P (op1);
3959       op1_is_pow2 = (op1_is_constant
3960                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3961                           || (! unsignedp
3962                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3963     }
3964
3965   /* If one of the operands is a volatile MEM, copy it into a register.  */
3966
3967   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3968     op0 = force_reg (compute_mode, op0);
3969   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3970     op1 = force_reg (compute_mode, op1);
3971
3972   /* If we need the remainder or if OP1 is constant, we need to
3973      put OP0 in a register in case it has any queued subexpressions.  */
3974   if (rem_flag || op1_is_constant)
3975     op0 = force_reg (compute_mode, op0);
3976
3977   last = get_last_insn ();
3978
3979   /* Promote floor rounding to trunc rounding for unsigned operations.  */
3980   if (unsignedp)
3981     {
3982       if (code == FLOOR_DIV_EXPR)
3983         code = TRUNC_DIV_EXPR;
3984       if (code == FLOOR_MOD_EXPR)
3985         code = TRUNC_MOD_EXPR;
3986       if (code == EXACT_DIV_EXPR && op1_is_pow2)
3987         code = TRUNC_DIV_EXPR;
3988     }
3989
3990   if (op1 != const0_rtx)
3991     switch (code)
3992       {
3993       case TRUNC_MOD_EXPR:
3994       case TRUNC_DIV_EXPR:
3995         if (op1_is_constant)
3996           {
3997             if (unsignedp)
3998               {
3999                 unsigned HOST_WIDE_INT mh;
4000                 int pre_shift, post_shift;
4001                 int dummy;
4002                 rtx ml;
4003                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4004                                             & GET_MODE_MASK (compute_mode));
4005
4006                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4007                   {
4008                     pre_shift = floor_log2 (d);
4009                     if (rem_flag)
4010                       {
4011                         remainder
4012                           = expand_binop (compute_mode, and_optab, op0,
4013                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4014                                           remainder, 1,
4015                                           OPTAB_LIB_WIDEN);
4016                         if (remainder)
4017                           return gen_lowpart (mode, remainder);
4018                       }
4019                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4020                                              build_int_cst (NULL_TREE,
4021                                                             pre_shift),
4022                                              tquotient, 1);
4023                   }
4024                 else if (size <= HOST_BITS_PER_WIDE_INT)
4025                   {
4026                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4027                       {
4028                         /* Most significant bit of divisor is set; emit an scc
4029                            insn.  */
4030                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4031                                                           compute_mode, 1, 1);
4032                       }
4033                     else
4034                       {
4035                         /* Find a suitable multiplier and right shift count
4036                            instead of multiplying with D.  */
4037
4038                         mh = choose_multiplier (d, size, size,
4039                                                 &ml, &post_shift, &dummy);
4040
4041                         /* If the suggested multiplier is more than SIZE bits,
4042                            we can do better for even divisors, using an
4043                            initial right shift.  */
4044                         if (mh != 0 && (d & 1) == 0)
4045                           {
4046                             pre_shift = floor_log2 (d & -d);
4047                             mh = choose_multiplier (d >> pre_shift, size,
4048                                                     size - pre_shift,
4049                                                     &ml, &post_shift, &dummy);
4050                             gcc_assert (!mh);
4051                           }
4052                         else
4053                           pre_shift = 0;
4054
4055                         if (mh != 0)
4056                           {
4057                             rtx t1, t2, t3, t4;
4058
4059                             if (post_shift - 1 >= BITS_PER_WORD)
4060                               goto fail1;
4061
4062                             extra_cost
4063                               = (shift_cost[speed][compute_mode][post_shift - 1]
4064                                  + shift_cost[speed][compute_mode][1]
4065                                  + 2 * add_cost[speed][compute_mode]);
4066                             t1 = expand_mult_highpart (compute_mode, op0, ml,
4067                                                        NULL_RTX, 1,
4068                                                        max_cost - extra_cost);
4069                             if (t1 == 0)
4070                               goto fail1;
4071                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4072                                                                op0, t1),
4073                                                 NULL_RTX);
4074                             t3 = expand_shift
4075                               (RSHIFT_EXPR, compute_mode, t2,
4076                                build_int_cst (NULL_TREE, 1),
4077                                NULL_RTX,1);
4078                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4079                                                               t1, t3),
4080                                                 NULL_RTX);
4081                             quotient = expand_shift
4082                               (RSHIFT_EXPR, compute_mode, t4,
4083                                build_int_cst (NULL_TREE, post_shift - 1),
4084                                tquotient, 1);
4085                           }
4086                         else
4087                           {
4088                             rtx t1, t2;
4089
4090                             if (pre_shift >= BITS_PER_WORD
4091                                 || post_shift >= BITS_PER_WORD)
4092                               goto fail1;
4093
4094                             t1 = expand_shift
4095                               (RSHIFT_EXPR, compute_mode, op0,
4096                                build_int_cst (NULL_TREE, pre_shift),
4097                                NULL_RTX, 1);
4098                             extra_cost
4099                               = (shift_cost[speed][compute_mode][pre_shift]
4100                                  + shift_cost[speed][compute_mode][post_shift]);
4101                             t2 = expand_mult_highpart (compute_mode, t1, ml,
4102                                                        NULL_RTX, 1,
4103                                                        max_cost - extra_cost);
4104                             if (t2 == 0)
4105                               goto fail1;
4106                             quotient = expand_shift
4107                               (RSHIFT_EXPR, compute_mode, t2,
4108                                build_int_cst (NULL_TREE, post_shift),
4109                                tquotient, 1);
4110                           }
4111                       }
4112                   }
4113                 else            /* Too wide mode to use tricky code */
4114                   break;
4115
4116                 insn = get_last_insn ();
4117                 if (insn != last
4118                     && (set = single_set (insn)) != 0
4119                     && SET_DEST (set) == quotient)
4120                   set_unique_reg_note (insn,
4121                                        REG_EQUAL,
4122                                        gen_rtx_UDIV (compute_mode, op0, op1));
4123               }
4124             else                /* TRUNC_DIV, signed */
4125               {
4126                 unsigned HOST_WIDE_INT ml;
4127                 int lgup, post_shift;
4128                 rtx mlr;
4129                 HOST_WIDE_INT d = INTVAL (op1);
4130                 unsigned HOST_WIDE_INT abs_d;
4131
4132                 /* Since d might be INT_MIN, we have to cast to
4133                    unsigned HOST_WIDE_INT before negating to avoid
4134                    undefined signed overflow.  */
4135                 abs_d = (d >= 0
4136                          ? (unsigned HOST_WIDE_INT) d
4137                          : - (unsigned HOST_WIDE_INT) d);
4138
4139                 /* n rem d = n rem -d */
4140                 if (rem_flag && d < 0)
4141                   {
4142                     d = abs_d;
4143                     op1 = gen_int_mode (abs_d, compute_mode);
4144                   }
4145
4146                 if (d == 1)
4147                   quotient = op0;
4148                 else if (d == -1)
4149                   quotient = expand_unop (compute_mode, neg_optab, op0,
4150                                           tquotient, 0);
4151                 else if (HOST_BITS_PER_WIDE_INT >= size
4152                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4153                   {
4154                     /* This case is not handled correctly below.  */
4155                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4156                                                 compute_mode, 1, 1);
4157                     if (quotient == 0)
4158                       goto fail1;
4159                   }
4160                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4161                          && (rem_flag ? smod_pow2_cheap[speed][compute_mode]
4162                                       : sdiv_pow2_cheap[speed][compute_mode])
4163                          /* We assume that cheap metric is true if the
4164                             optab has an expander for this mode.  */
4165                          && ((optab_handler ((rem_flag ? smod_optab
4166                                               : sdiv_optab),
4167                                              compute_mode)
4168                               != CODE_FOR_nothing)
4169                              || (optab_handler (sdivmod_optab,
4170                                                 compute_mode)
4171                                  != CODE_FOR_nothing)))
4172                   ;
4173                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4174                   {
4175                     if (rem_flag)
4176                       {
4177                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4178                         if (remainder)
4179                           return gen_lowpart (mode, remainder);
4180                       }
4181
4182                     if (sdiv_pow2_cheap[speed][compute_mode]
4183                         && ((optab_handler (sdiv_optab, compute_mode)
4184                              != CODE_FOR_nothing)
4185                             || (optab_handler (sdivmod_optab, compute_mode)
4186                                 != CODE_FOR_nothing)))
4187                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4188                                                 compute_mode, op0,
4189                                                 gen_int_mode (abs_d,
4190                                                               compute_mode),
4191                                                 NULL_RTX, 0);
4192                     else
4193                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4194
4195                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4196                        negate the quotient.  */
4197                     if (d < 0)
4198                       {
4199                         insn = get_last_insn ();
4200                         if (insn != last
4201                             && (set = single_set (insn)) != 0
4202                             && SET_DEST (set) == quotient
4203                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4204                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4205                           set_unique_reg_note (insn,
4206                                                REG_EQUAL,
4207                                                gen_rtx_DIV (compute_mode,
4208                                                             op0,
4209                                                             GEN_INT
4210                                                             (trunc_int_for_mode
4211                                                              (abs_d,
4212                                                               compute_mode))));
4213
4214                         quotient = expand_unop (compute_mode, neg_optab,
4215                                                 quotient, quotient, 0);
4216                       }
4217                   }
4218                 else if (size <= HOST_BITS_PER_WIDE_INT)
4219                   {
4220                     choose_multiplier (abs_d, size, size - 1,
4221                                        &mlr, &post_shift, &lgup);
4222                     ml = (unsigned HOST_WIDE_INT) INTVAL (mlr);
4223                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4224                       {
4225                         rtx t1, t2, t3;
4226
4227                         if (post_shift >= BITS_PER_WORD
4228                             || size - 1 >= BITS_PER_WORD)
4229                           goto fail1;
4230
4231                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4232                                       + shift_cost[speed][compute_mode][size - 1]
4233                                       + add_cost[speed][compute_mode]);
4234                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4235                                                    NULL_RTX, 0,
4236                                                    max_cost - extra_cost);
4237                         if (t1 == 0)
4238                           goto fail1;
4239                         t2 = expand_shift
4240                           (RSHIFT_EXPR, compute_mode, t1,
4241                            build_int_cst (NULL_TREE, post_shift),
4242                            NULL_RTX, 0);
4243                         t3 = expand_shift
4244                           (RSHIFT_EXPR, compute_mode, op0,
4245                            build_int_cst (NULL_TREE, size - 1),
4246                            NULL_RTX, 0);
4247                         if (d < 0)
4248                           quotient
4249                             = force_operand (gen_rtx_MINUS (compute_mode,
4250                                                             t3, t2),
4251                                              tquotient);
4252                         else
4253                           quotient
4254                             = force_operand (gen_rtx_MINUS (compute_mode,
4255                                                             t2, t3),
4256                                              tquotient);
4257                       }
4258                     else
4259                       {
4260                         rtx t1, t2, t3, t4;
4261
4262                         if (post_shift >= BITS_PER_WORD
4263                             || size - 1 >= BITS_PER_WORD)
4264                           goto fail1;
4265
4266                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4267                         mlr = gen_int_mode (ml, compute_mode);
4268                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4269                                       + shift_cost[speed][compute_mode][size - 1]
4270                                       + 2 * add_cost[speed][compute_mode]);
4271                         t1 = expand_mult_highpart (compute_mode, op0, mlr,
4272                                                    NULL_RTX, 0,
4273                                                    max_cost - extra_cost);
4274                         if (t1 == 0)
4275                           goto fail1;
4276                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4277                                                           t1, op0),
4278                                             NULL_RTX);
4279                         t3 = expand_shift
4280                           (RSHIFT_EXPR, compute_mode, t2,
4281                            build_int_cst (NULL_TREE, post_shift),
4282                            NULL_RTX, 0);
4283                         t4 = expand_shift
4284                           (RSHIFT_EXPR, compute_mode, op0,
4285                            build_int_cst (NULL_TREE, size - 1),
4286                            NULL_RTX, 0);
4287                         if (d < 0)
4288                           quotient
4289                             = force_operand (gen_rtx_MINUS (compute_mode,
4290                                                             t4, t3),
4291                                              tquotient);
4292                         else
4293                           quotient
4294                             = force_operand (gen_rtx_MINUS (compute_mode,
4295                                                             t3, t4),
4296                                              tquotient);
4297                       }
4298                   }
4299                 else            /* Too wide mode to use tricky code */
4300                   break;
4301
4302                 insn = get_last_insn ();
4303                 if (insn != last
4304                     && (set = single_set (insn)) != 0
4305                     && SET_DEST (set) == quotient)
4306                   set_unique_reg_note (insn,
4307                                        REG_EQUAL,
4308                                        gen_rtx_DIV (compute_mode, op0, op1));
4309               }
4310             break;
4311           }
4312       fail1:
4313         delete_insns_since (last);
4314         break;
4315
4316       case FLOOR_DIV_EXPR:
4317       case FLOOR_MOD_EXPR:
4318       /* We will come here only for signed operations.  */
4319         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4320           {
4321             unsigned HOST_WIDE_INT mh;
4322             int pre_shift, lgup, post_shift;
4323             HOST_WIDE_INT d = INTVAL (op1);
4324             rtx ml;
4325
4326             if (d > 0)
4327               {
4328                 /* We could just as easily deal with negative constants here,
4329                    but it does not seem worth the trouble for GCC 2.6.  */
4330                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4331                   {
4332                     pre_shift = floor_log2 (d);
4333                     if (rem_flag)
4334                       {
4335                         remainder = expand_binop (compute_mode, and_optab, op0,
4336                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4337                                                   remainder, 0, OPTAB_LIB_WIDEN);
4338                         if (remainder)
4339                           return gen_lowpart (mode, remainder);
4340                       }
4341                     quotient = expand_shift
4342                       (RSHIFT_EXPR, compute_mode, op0,
4343                        build_int_cst (NULL_TREE, pre_shift),
4344                        tquotient, 0);
4345                   }
4346                 else
4347                   {
4348                     rtx t1, t2, t3, t4;
4349
4350                     mh = choose_multiplier (d, size, size - 1,
4351                                             &ml, &post_shift, &lgup);
4352                     gcc_assert (!mh);
4353
4354                     if (post_shift < BITS_PER_WORD
4355                         && size - 1 < BITS_PER_WORD)
4356                       {
4357                         t1 = expand_shift
4358                           (RSHIFT_EXPR, compute_mode, op0,
4359                            build_int_cst (NULL_TREE, size - 1),
4360                            NULL_RTX, 0);
4361                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4362                                            NULL_RTX, 0, OPTAB_WIDEN);
4363                         extra_cost = (shift_cost[speed][compute_mode][post_shift]
4364                                       + shift_cost[speed][compute_mode][size - 1]
4365                                       + 2 * add_cost[speed][compute_mode]);
4366                         t3 = expand_mult_highpart (compute_mode, t2, ml,
4367                                                    NULL_RTX, 1,
4368                                                    max_cost - extra_cost);
4369                         if (t3 != 0)
4370                           {
4371                             t4 = expand_shift
4372                               (RSHIFT_EXPR, compute_mode, t3,
4373                                build_int_cst (NULL_TREE, post_shift),
4374                                NULL_RTX, 1);
4375                             quotient = expand_binop (compute_mode, xor_optab,
4376                                                      t4, t1, tquotient, 0,
4377                                                      OPTAB_WIDEN);
4378                           }
4379                       }
4380                   }
4381               }
4382             else
4383               {
4384                 rtx nsign, t1, t2, t3, t4;
4385                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4386                                                   op0, constm1_rtx), NULL_RTX);
4387                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4388                                    0, OPTAB_WIDEN);
4389                 nsign = expand_shift
4390                   (RSHIFT_EXPR, compute_mode, t2,
4391                    build_int_cst (NULL_TREE, size - 1),
4392                    NULL_RTX, 0);
4393                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4394                                     NULL_RTX);
4395                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4396                                     NULL_RTX, 0);
4397                 if (t4)
4398                   {
4399                     rtx t5;
4400                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4401                                       NULL_RTX, 0);
4402                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4403                                                             t4, t5),
4404                                               tquotient);
4405                   }
4406               }
4407           }
4408
4409         if (quotient != 0)
4410           break;
4411         delete_insns_since (last);
4412
4413         /* Try using an instruction that produces both the quotient and
4414            remainder, using truncation.  We can easily compensate the quotient
4415            or remainder to get floor rounding, once we have the remainder.
4416            Notice that we compute also the final remainder value here,
4417            and return the result right away.  */
4418         if (target == 0 || GET_MODE (target) != compute_mode)
4419           target = gen_reg_rtx (compute_mode);
4420
4421         if (rem_flag)
4422           {
4423             remainder
4424               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4425             quotient = gen_reg_rtx (compute_mode);
4426           }
4427         else
4428           {
4429             quotient
4430               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4431             remainder = gen_reg_rtx (compute_mode);
4432           }
4433
4434         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4435                                  quotient, remainder, 0))
4436           {
4437             /* This could be computed with a branch-less sequence.
4438                Save that for later.  */
4439             rtx tem;
4440             rtx label = gen_label_rtx ();
4441             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4442             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4443                                 NULL_RTX, 0, OPTAB_WIDEN);
4444             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4445             expand_dec (quotient, const1_rtx);
4446             expand_inc (remainder, op1);
4447             emit_label (label);
4448             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4449           }
4450
4451         /* No luck with division elimination or divmod.  Have to do it
4452            by conditionally adjusting op0 *and* the result.  */
4453         {
4454           rtx label1, label2, label3, label4, label5;
4455           rtx adjusted_op0;
4456           rtx tem;
4457
4458           quotient = gen_reg_rtx (compute_mode);
4459           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4460           label1 = gen_label_rtx ();
4461           label2 = gen_label_rtx ();
4462           label3 = gen_label_rtx ();
4463           label4 = gen_label_rtx ();
4464           label5 = gen_label_rtx ();
4465           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4466           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4467           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4468                               quotient, 0, OPTAB_LIB_WIDEN);
4469           if (tem != quotient)
4470             emit_move_insn (quotient, tem);
4471           emit_jump_insn (gen_jump (label5));
4472           emit_barrier ();
4473           emit_label (label1);
4474           expand_inc (adjusted_op0, const1_rtx);
4475           emit_jump_insn (gen_jump (label4));
4476           emit_barrier ();
4477           emit_label (label2);
4478           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4479           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4480                               quotient, 0, OPTAB_LIB_WIDEN);
4481           if (tem != quotient)
4482             emit_move_insn (quotient, tem);
4483           emit_jump_insn (gen_jump (label5));
4484           emit_barrier ();
4485           emit_label (label3);
4486           expand_dec (adjusted_op0, const1_rtx);
4487           emit_label (label4);
4488           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4489                               quotient, 0, OPTAB_LIB_WIDEN);
4490           if (tem != quotient)
4491             emit_move_insn (quotient, tem);
4492           expand_dec (quotient, const1_rtx);
4493           emit_label (label5);
4494         }
4495         break;
4496
4497       case CEIL_DIV_EXPR:
4498       case CEIL_MOD_EXPR:
4499         if (unsignedp)
4500           {
4501             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4502               {
4503                 rtx t1, t2, t3;
4504                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4505                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4506                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4507                                    tquotient, 1);
4508                 t2 = expand_binop (compute_mode, and_optab, op0,
4509                                    GEN_INT (d - 1),
4510                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4511                 t3 = gen_reg_rtx (compute_mode);
4512                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4513                                       compute_mode, 1, 1);
4514                 if (t3 == 0)
4515                   {
4516                     rtx lab;
4517                     lab = gen_label_rtx ();
4518                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4519                     expand_inc (t1, const1_rtx);
4520                     emit_label (lab);
4521                     quotient = t1;
4522                   }
4523                 else
4524                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4525                                                           t1, t3),
4526                                             tquotient);
4527                 break;
4528               }
4529
4530             /* Try using an instruction that produces both the quotient and
4531                remainder, using truncation.  We can easily compensate the
4532                quotient or remainder to get ceiling rounding, once we have the
4533                remainder.  Notice that we compute also the final remainder
4534                value here, and return the result right away.  */
4535             if (target == 0 || GET_MODE (target) != compute_mode)
4536               target = gen_reg_rtx (compute_mode);
4537
4538             if (rem_flag)
4539               {
4540                 remainder = (REG_P (target)
4541                              ? target : gen_reg_rtx (compute_mode));
4542                 quotient = gen_reg_rtx (compute_mode);
4543               }
4544             else
4545               {
4546                 quotient = (REG_P (target)
4547                             ? target : gen_reg_rtx (compute_mode));
4548                 remainder = gen_reg_rtx (compute_mode);
4549               }
4550
4551             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4552                                      remainder, 1))
4553               {
4554                 /* This could be computed with a branch-less sequence.
4555                    Save that for later.  */
4556                 rtx label = gen_label_rtx ();
4557                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4558                                  compute_mode, label);
4559                 expand_inc (quotient, const1_rtx);
4560                 expand_dec (remainder, op1);
4561                 emit_label (label);
4562                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4563               }
4564
4565             /* No luck with division elimination or divmod.  Have to do it
4566                by conditionally adjusting op0 *and* the result.  */
4567             {
4568               rtx label1, label2;
4569               rtx adjusted_op0, tem;
4570
4571               quotient = gen_reg_rtx (compute_mode);
4572               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4573               label1 = gen_label_rtx ();
4574               label2 = gen_label_rtx ();
4575               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4576                                compute_mode, label1);
4577               emit_move_insn  (quotient, const0_rtx);
4578               emit_jump_insn (gen_jump (label2));
4579               emit_barrier ();
4580               emit_label (label1);
4581               expand_dec (adjusted_op0, const1_rtx);
4582               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4583                                   quotient, 1, OPTAB_LIB_WIDEN);
4584               if (tem != quotient)
4585                 emit_move_insn (quotient, tem);
4586               expand_inc (quotient, const1_rtx);
4587               emit_label (label2);
4588             }
4589           }
4590         else /* signed */
4591           {
4592             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4593                 && INTVAL (op1) >= 0)
4594               {
4595                 /* This is extremely similar to the code for the unsigned case
4596                    above.  For 2.7 we should merge these variants, but for
4597                    2.6.1 I don't want to touch the code for unsigned since that
4598                    get used in C.  The signed case will only be used by other
4599                    languages (Ada).  */
4600
4601                 rtx t1, t2, t3;
4602                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4603                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4604                                    build_int_cst (NULL_TREE, floor_log2 (d)),
4605                                    tquotient, 0);
4606                 t2 = expand_binop (compute_mode, and_optab, op0,
4607                                    GEN_INT (d - 1),
4608                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4609                 t3 = gen_reg_rtx (compute_mode);
4610                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4611                                       compute_mode, 1, 1);
4612                 if (t3 == 0)
4613                   {
4614                     rtx lab;
4615                     lab = gen_label_rtx ();
4616                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4617                     expand_inc (t1, const1_rtx);
4618                     emit_label (lab);
4619                     quotient = t1;
4620                   }
4621                 else
4622                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4623                                                           t1, t3),
4624                                             tquotient);
4625                 break;
4626               }
4627
4628             /* Try using an instruction that produces both the quotient and
4629                remainder, using truncation.  We can easily compensate the
4630                quotient or remainder to get ceiling rounding, once we have the
4631                remainder.  Notice that we compute also the final remainder
4632                value here, and return the result right away.  */
4633             if (target == 0 || GET_MODE (target) != compute_mode)
4634               target = gen_reg_rtx (compute_mode);
4635             if (rem_flag)
4636               {
4637                 remainder= (REG_P (target)
4638                             ? target : gen_reg_rtx (compute_mode));
4639                 quotient = gen_reg_rtx (compute_mode);
4640               }
4641             else
4642               {
4643                 quotient = (REG_P (target)
4644                             ? target : gen_reg_rtx (compute_mode));
4645                 remainder = gen_reg_rtx (compute_mode);
4646               }
4647
4648             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4649                                      remainder, 0))
4650               {
4651                 /* This could be computed with a branch-less sequence.
4652                    Save that for later.  */
4653                 rtx tem;
4654                 rtx label = gen_label_rtx ();
4655                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4656                                  compute_mode, label);
4657                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4658                                     NULL_RTX, 0, OPTAB_WIDEN);
4659                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4660                 expand_inc (quotient, const1_rtx);
4661                 expand_dec (remainder, op1);
4662                 emit_label (label);
4663                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4664               }
4665
4666             /* No luck with division elimination or divmod.  Have to do it
4667                by conditionally adjusting op0 *and* the result.  */
4668             {
4669               rtx label1, label2, label3, label4, label5;
4670               rtx adjusted_op0;
4671               rtx tem;
4672
4673               quotient = gen_reg_rtx (compute_mode);
4674               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4675               label1 = gen_label_rtx ();
4676               label2 = gen_label_rtx ();
4677               label3 = gen_label_rtx ();
4678               label4 = gen_label_rtx ();
4679               label5 = gen_label_rtx ();
4680               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4681               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4682                                compute_mode, label1);
4683               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4684                                   quotient, 0, OPTAB_LIB_WIDEN);
4685               if (tem != quotient)
4686                 emit_move_insn (quotient, tem);
4687               emit_jump_insn (gen_jump (label5));
4688               emit_barrier ();
4689               emit_label (label1);
4690               expand_dec (adjusted_op0, const1_rtx);
4691               emit_jump_insn (gen_jump (label4));
4692               emit_barrier ();
4693               emit_label (label2);
4694               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4695                                compute_mode, label3);
4696               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4697                                   quotient, 0, OPTAB_LIB_WIDEN);
4698               if (tem != quotient)
4699                 emit_move_insn (quotient, tem);
4700               emit_jump_insn (gen_jump (label5));
4701               emit_barrier ();
4702               emit_label (label3);
4703               expand_inc (adjusted_op0, const1_rtx);
4704               emit_label (label4);
4705               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4706                                   quotient, 0, OPTAB_LIB_WIDEN);
4707               if (tem != quotient)
4708                 emit_move_insn (quotient, tem);
4709               expand_inc (quotient, const1_rtx);
4710               emit_label (label5);
4711             }
4712           }
4713         break;
4714
4715       case EXACT_DIV_EXPR:
4716         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4717           {
4718             HOST_WIDE_INT d = INTVAL (op1);
4719             unsigned HOST_WIDE_INT ml;
4720             int pre_shift;
4721             rtx t1;
4722
4723             pre_shift = floor_log2 (d & -d);
4724             ml = invert_mod2n (d >> pre_shift, size);
4725             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4726                                build_int_cst (NULL_TREE, pre_shift),
4727                                NULL_RTX, unsignedp);
4728             quotient = expand_mult (compute_mode, t1,
4729                                     gen_int_mode (ml, compute_mode),
4730                                     NULL_RTX, 1);
4731
4732             insn = get_last_insn ();
4733             set_unique_reg_note (insn,
4734                                  REG_EQUAL,
4735                                  gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4736                                                  compute_mode,
4737                                                  op0, op1));
4738           }
4739         break;
4740
4741       case ROUND_DIV_EXPR:
4742       case ROUND_MOD_EXPR:
4743         if (unsignedp)
4744           {
4745             rtx tem;
4746             rtx label;
4747             label = gen_label_rtx ();
4748             quotient = gen_reg_rtx (compute_mode);
4749             remainder = gen_reg_rtx (compute_mode);
4750             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4751               {
4752                 rtx tem;
4753                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4754                                          quotient, 1, OPTAB_LIB_WIDEN);
4755                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4756                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4757                                           remainder, 1, OPTAB_LIB_WIDEN);
4758               }
4759             tem = plus_constant (op1, -1);
4760             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4761                                 build_int_cst (NULL_TREE, 1),
4762                                 NULL_RTX, 1);
4763             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4764             expand_inc (quotient, const1_rtx);
4765             expand_dec (remainder, op1);
4766             emit_label (label);
4767           }
4768         else
4769           {
4770             rtx abs_rem, abs_op1, tem, mask;
4771             rtx label;
4772             label = gen_label_rtx ();
4773             quotient = gen_reg_rtx (compute_mode);
4774             remainder = gen_reg_rtx (compute_mode);
4775             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4776               {
4777                 rtx tem;
4778                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4779                                          quotient, 0, OPTAB_LIB_WIDEN);
4780                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4781                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4782                                           remainder, 0, OPTAB_LIB_WIDEN);
4783               }
4784             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4785             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4786             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4787                                 build_int_cst (NULL_TREE, 1),
4788                                 NULL_RTX, 1);
4789             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4790             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4791                                 NULL_RTX, 0, OPTAB_WIDEN);
4792             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4793                                  build_int_cst (NULL_TREE, size - 1),
4794                                  NULL_RTX, 0);
4795             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4796                                 NULL_RTX, 0, OPTAB_WIDEN);
4797             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4798                                 NULL_RTX, 0, OPTAB_WIDEN);
4799             expand_inc (quotient, tem);
4800             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4801                                 NULL_RTX, 0, OPTAB_WIDEN);
4802             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4803                                 NULL_RTX, 0, OPTAB_WIDEN);
4804             expand_dec (remainder, tem);
4805             emit_label (label);
4806           }
4807         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4808
4809       default:
4810         gcc_unreachable ();
4811       }
4812
4813   if (quotient == 0)
4814     {
4815       if (target && GET_MODE (target) != compute_mode)
4816         target = 0;
4817
4818       if (rem_flag)
4819         {
4820           /* Try to produce the remainder without producing the quotient.
4821              If we seem to have a divmod pattern that does not require widening,
4822              don't try widening here.  We should really have a WIDEN argument
4823              to expand_twoval_binop, since what we'd really like to do here is
4824              1) try a mod insn in compute_mode
4825              2) try a divmod insn in compute_mode
4826              3) try a div insn in compute_mode and multiply-subtract to get
4827                 remainder
4828              4) try the same things with widening allowed.  */
4829           remainder
4830             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4831                                  op0, op1, target,
4832                                  unsignedp,
4833                                  ((optab_handler (optab2, compute_mode)
4834                                    != CODE_FOR_nothing)
4835                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4836           if (remainder == 0)
4837             {
4838               /* No luck there.  Can we do remainder and divide at once
4839                  without a library call?  */
4840               remainder = gen_reg_rtx (compute_mode);
4841               if (! expand_twoval_binop ((unsignedp
4842                                           ? udivmod_optab
4843                                           : sdivmod_optab),
4844                                          op0, op1,
4845                                          NULL_RTX, remainder, unsignedp))
4846                 remainder = 0;
4847             }
4848
4849           if (remainder)
4850             return gen_lowpart (mode, remainder);
4851         }
4852
4853       /* Produce the quotient.  Try a quotient insn, but not a library call.
4854          If we have a divmod in this mode, use it in preference to widening
4855          the div (for this test we assume it will not fail). Note that optab2
4856          is set to the one of the two optabs that the call below will use.  */
4857       quotient
4858         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4859                              op0, op1, rem_flag ? NULL_RTX : target,
4860                              unsignedp,
4861                              ((optab_handler (optab2, compute_mode)
4862                                != CODE_FOR_nothing)
4863                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4864
4865       if (quotient == 0)
4866         {
4867           /* No luck there.  Try a quotient-and-remainder insn,
4868              keeping the quotient alone.  */
4869           quotient = gen_reg_rtx (compute_mode);
4870           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4871                                      op0, op1,
4872                                      quotient, NULL_RTX, unsignedp))
4873             {
4874               quotient = 0;
4875               if (! rem_flag)
4876                 /* Still no luck.  If we are not computing the remainder,
4877                    use a library call for the quotient.  */
4878                 quotient = sign_expand_binop (compute_mode,
4879                                               udiv_optab, sdiv_optab,
4880                                               op0, op1, target,
4881                                               unsignedp, OPTAB_LIB_WIDEN);
4882             }
4883         }
4884     }
4885
4886   if (rem_flag)
4887     {
4888       if (target && GET_MODE (target) != compute_mode)
4889         target = 0;
4890
4891       if (quotient == 0)
4892         {
4893           /* No divide instruction either.  Use library for remainder.  */
4894           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4895                                          op0, op1, target,
4896                                          unsignedp, OPTAB_LIB_WIDEN);
4897           /* No remainder function.  Try a quotient-and-remainder
4898              function, keeping the remainder.  */
4899           if (!remainder)
4900             {
4901               remainder = gen_reg_rtx (compute_mode);
4902               if (!expand_twoval_binop_libfunc
4903                   (unsignedp ? udivmod_optab : sdivmod_optab,
4904                    op0, op1,
4905                    NULL_RTX, remainder,
4906                    unsignedp ? UMOD : MOD))
4907                 remainder = NULL_RTX;
4908             }
4909         }
4910       else
4911         {
4912           /* We divided.  Now finish doing X - Y * (X / Y).  */
4913           remainder = expand_mult (compute_mode, quotient, op1,
4914                                    NULL_RTX, unsignedp);
4915           remainder = expand_binop (compute_mode, sub_optab, op0,
4916                                     remainder, target, unsignedp,
4917                                     OPTAB_LIB_WIDEN);
4918         }
4919     }
4920
4921   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4922 }
4923 \f
4924 /* Return a tree node with data type TYPE, describing the value of X.
4925    Usually this is an VAR_DECL, if there is no obvious better choice.
4926    X may be an expression, however we only support those expressions
4927    generated by loop.c.  */
4928
4929 tree
4930 make_tree (tree type, rtx x)
4931 {
4932   tree t;
4933
4934   switch (GET_CODE (x))
4935     {
4936     case CONST_INT:
4937       {
4938         HOST_WIDE_INT hi = 0;
4939
4940         if (INTVAL (x) < 0
4941             && !(TYPE_UNSIGNED (type)
4942                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4943                      < HOST_BITS_PER_WIDE_INT)))
4944           hi = -1;
4945
4946         t = build_int_cst_wide (type, INTVAL (x), hi);
4947
4948         return t;
4949       }
4950
4951     case CONST_DOUBLE:
4952       if (GET_MODE (x) == VOIDmode)
4953         t = build_int_cst_wide (type,
4954                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4955       else
4956         {
4957           REAL_VALUE_TYPE d;
4958
4959           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4960           t = build_real (type, d);
4961         }
4962
4963       return t;
4964
4965     case CONST_VECTOR:
4966       {
4967         int units = CONST_VECTOR_NUNITS (x);
4968         tree itype = TREE_TYPE (type);
4969         tree t = NULL_TREE;
4970         int i;
4971
4972
4973         /* Build a tree with vector elements.  */
4974         for (i = units - 1; i >= 0; --i)
4975           {
4976             rtx elt = CONST_VECTOR_ELT (x, i);
4977             t = tree_cons (NULL_TREE, make_tree (itype, elt), t);
4978           }
4979
4980         return build_vector (type, t);
4981       }
4982
4983     case PLUS:
4984       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4985                           make_tree (type, XEXP (x, 1)));
4986
4987     case MINUS:
4988       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4989                           make_tree (type, XEXP (x, 1)));
4990
4991     case NEG:
4992       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4993
4994     case MULT:
4995       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4996                           make_tree (type, XEXP (x, 1)));
4997
4998     case ASHIFT:
4999       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5000                           make_tree (type, XEXP (x, 1)));
5001
5002     case LSHIFTRT:
5003       t = unsigned_type_for (type);
5004       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5005                                          make_tree (t, XEXP (x, 0)),
5006                                          make_tree (type, XEXP (x, 1))));
5007
5008     case ASHIFTRT:
5009       t = signed_type_for (type);
5010       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5011                                          make_tree (t, XEXP (x, 0)),
5012                                          make_tree (type, XEXP (x, 1))));
5013
5014     case DIV:
5015       if (TREE_CODE (type) != REAL_TYPE)
5016         t = signed_type_for (type);
5017       else
5018         t = type;
5019
5020       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5021                                          make_tree (t, XEXP (x, 0)),
5022                                          make_tree (t, XEXP (x, 1))));
5023     case UDIV:
5024       t = unsigned_type_for (type);
5025       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5026                                          make_tree (t, XEXP (x, 0)),
5027                                          make_tree (t, XEXP (x, 1))));
5028
5029     case SIGN_EXTEND:
5030     case ZERO_EXTEND:
5031       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5032                                           GET_CODE (x) == ZERO_EXTEND);
5033       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5034
5035     case CONST:
5036       return make_tree (type, XEXP (x, 0));
5037
5038     case SYMBOL_REF:
5039       t = SYMBOL_REF_DECL (x);
5040       if (t)
5041         return fold_convert (type, build_fold_addr_expr (t));
5042       /* else fall through.  */
5043
5044     default:
5045       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5046
5047       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5048          address mode to pointer mode.  */
5049       if (POINTER_TYPE_P (type))
5050         x = convert_memory_address_addr_space
5051               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5052
5053       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5054          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5055       t->decl_with_rtl.rtl = x;
5056
5057       return t;
5058     }
5059 }
5060 \f
5061 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5062    and returning TARGET.
5063
5064    If TARGET is 0, a pseudo-register or constant is returned.  */
5065
5066 rtx
5067 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5068 {
5069   rtx tem = 0;
5070
5071   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5072     tem = simplify_binary_operation (AND, mode, op0, op1);
5073   if (tem == 0)
5074     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5075
5076   if (target == 0)
5077     target = tem;
5078   else if (tem != target)
5079     emit_move_insn (target, tem);
5080   return target;
5081 }
5082
5083 /* Helper function for emit_store_flag.  */
5084 static rtx
5085 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5086              enum machine_mode mode, enum machine_mode compare_mode,
5087              int unsignedp, rtx x, rtx y, int normalizep,
5088              enum machine_mode target_mode)
5089 {
5090   rtx op0, last, comparison, subtarget, pattern;
5091   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5092
5093   last = get_last_insn ();
5094   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5095   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5096   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5097   if (!x || !y
5098       || !insn_data[icode].operand[2].predicate
5099           (x, insn_data[icode].operand[2].mode)
5100       || !insn_data[icode].operand[3].predicate
5101           (y, insn_data[icode].operand[3].mode)
5102       || !insn_data[icode].operand[1].predicate (comparison, VOIDmode))
5103     {
5104       delete_insns_since (last);
5105       return NULL_RTX;
5106     }
5107
5108   if (target_mode == VOIDmode)
5109     target_mode = result_mode;
5110   if (!target)
5111     target = gen_reg_rtx (target_mode);
5112
5113   if (optimize
5114       || !(insn_data[(int) icode].operand[0].predicate (target, result_mode)))
5115     subtarget = gen_reg_rtx (result_mode);
5116   else
5117     subtarget = target;
5118
5119   pattern = GEN_FCN (icode) (subtarget, comparison, x, y);
5120   if (!pattern)
5121     return NULL_RTX;
5122   emit_insn (pattern);
5123
5124   /* If we are converting to a wider mode, first convert to
5125      TARGET_MODE, then normalize.  This produces better combining
5126      opportunities on machines that have a SIGN_EXTRACT when we are
5127      testing a single bit.  This mostly benefits the 68k.
5128
5129      If STORE_FLAG_VALUE does not have the sign bit set when
5130      interpreted in MODE, we can do this conversion as unsigned, which
5131      is usually more efficient.  */
5132   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5133     {
5134       convert_move (target, subtarget,
5135                     (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT)
5136                     && 0 == (STORE_FLAG_VALUE
5137                              & ((HOST_WIDE_INT) 1
5138                                 << (GET_MODE_BITSIZE (result_mode) -1))));
5139       op0 = target;
5140       result_mode = target_mode;
5141     }
5142   else
5143     op0 = subtarget;
5144
5145   /* If we want to keep subexpressions around, don't reuse our last
5146      target.  */
5147   if (optimize)
5148     subtarget = 0;
5149
5150   /* Now normalize to the proper value in MODE.  Sometimes we don't
5151      have to do anything.  */
5152   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5153     ;
5154   /* STORE_FLAG_VALUE might be the most negative number, so write
5155      the comparison this way to avoid a compiler-time warning.  */
5156   else if (- normalizep == STORE_FLAG_VALUE)
5157     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5158
5159   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5160      it hard to use a value of just the sign bit due to ANSI integer
5161      constant typing rules.  */
5162   else if (GET_MODE_BITSIZE (result_mode) <= HOST_BITS_PER_WIDE_INT
5163            && (STORE_FLAG_VALUE
5164                & ((HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (result_mode) - 1))))
5165     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5166                         size_int (GET_MODE_BITSIZE (result_mode) - 1), subtarget,
5167                         normalizep == 1);
5168   else
5169     {
5170       gcc_assert (STORE_FLAG_VALUE & 1);
5171
5172       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5173       if (normalizep == -1)
5174         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5175     }
5176
5177   /* If we were converting to a smaller mode, do the conversion now.  */
5178   if (target_mode != result_mode)
5179     {
5180       convert_move (target, op0, 0);
5181       return target;
5182     }
5183   else
5184     return op0;
5185 }
5186
5187
5188 /* A subroutine of emit_store_flag only including "tricks" that do not
5189    need a recursive call.  These are kept separate to avoid infinite
5190    loops.  */
5191
5192 static rtx
5193 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5194                    enum machine_mode mode, int unsignedp, int normalizep,
5195                    enum machine_mode target_mode)
5196 {
5197   rtx subtarget;
5198   enum insn_code icode;
5199   enum machine_mode compare_mode;
5200   enum mode_class mclass;
5201   enum rtx_code scode;
5202   rtx tem;
5203
5204   if (unsignedp)
5205     code = unsigned_condition (code);
5206   scode = swap_condition (code);
5207
5208   /* If one operand is constant, make it the second one.  Only do this
5209      if the other operand is not constant as well.  */
5210
5211   if (swap_commutative_operands_p (op0, op1))
5212     {
5213       tem = op0;
5214       op0 = op1;
5215       op1 = tem;
5216       code = swap_condition (code);
5217     }
5218
5219   if (mode == VOIDmode)
5220     mode = GET_MODE (op0);
5221
5222   /* For some comparisons with 1 and -1, we can convert this to
5223      comparisons with zero.  This will often produce more opportunities for
5224      store-flag insns.  */
5225
5226   switch (code)
5227     {
5228     case LT:
5229       if (op1 == const1_rtx)
5230         op1 = const0_rtx, code = LE;
5231       break;
5232     case LE:
5233       if (op1 == constm1_rtx)
5234         op1 = const0_rtx, code = LT;
5235       break;
5236     case GE:
5237       if (op1 == const1_rtx)
5238         op1 = const0_rtx, code = GT;
5239       break;
5240     case GT:
5241       if (op1 == constm1_rtx)
5242         op1 = const0_rtx, code = GE;
5243       break;
5244     case GEU:
5245       if (op1 == const1_rtx)
5246         op1 = const0_rtx, code = NE;
5247       break;
5248     case LTU:
5249       if (op1 == const1_rtx)
5250         op1 = const0_rtx, code = EQ;
5251       break;
5252     default:
5253       break;
5254     }
5255
5256   /* If we are comparing a double-word integer with zero or -1, we can
5257      convert the comparison into one involving a single word.  */
5258   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5259       && GET_MODE_CLASS (mode) == MODE_INT
5260       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5261     {
5262       if ((code == EQ || code == NE)
5263           && (op1 == const0_rtx || op1 == constm1_rtx))
5264         {
5265           rtx op00, op01;
5266
5267           /* Do a logical OR or AND of the two words and compare the
5268              result.  */
5269           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5270           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5271           tem = expand_binop (word_mode,
5272                               op1 == const0_rtx ? ior_optab : and_optab,
5273                               op00, op01, NULL_RTX, unsignedp,
5274                               OPTAB_DIRECT);
5275
5276           if (tem != 0)
5277             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5278                                    unsignedp, normalizep);
5279         }
5280       else if ((code == LT || code == GE) && op1 == const0_rtx)
5281         {
5282           rtx op0h;
5283
5284           /* If testing the sign bit, can just test on high word.  */
5285           op0h = simplify_gen_subreg (word_mode, op0, mode,
5286                                       subreg_highpart_offset (word_mode,
5287                                                               mode));
5288           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5289                                  unsignedp, normalizep);
5290         }
5291       else
5292         tem = NULL_RTX;
5293
5294       if (tem)
5295         {
5296           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5297             return tem;
5298           if (!target)
5299             target = gen_reg_rtx (target_mode);
5300
5301           convert_move (target, tem,
5302                         0 == ((normalizep ? normalizep : STORE_FLAG_VALUE)
5303                               & ((HOST_WIDE_INT) 1
5304                                  << (GET_MODE_BITSIZE (word_mode) -1))));
5305           return target;
5306         }
5307     }
5308
5309   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5310      complement of A (for GE) and shifting the sign bit to the low bit.  */
5311   if (op1 == const0_rtx && (code == LT || code == GE)
5312       && GET_MODE_CLASS (mode) == MODE_INT
5313       && (normalizep || STORE_FLAG_VALUE == 1
5314           || (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5315               && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5316                   == ((unsigned HOST_WIDE_INT) 1
5317                       << (GET_MODE_BITSIZE (mode) - 1))))))
5318     {
5319       subtarget = target;
5320
5321       if (!target)
5322         target_mode = mode;
5323
5324       /* If the result is to be wider than OP0, it is best to convert it
5325          first.  If it is to be narrower, it is *incorrect* to convert it
5326          first.  */
5327       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5328         {
5329           op0 = convert_modes (target_mode, mode, op0, 0);
5330           mode = target_mode;
5331         }
5332
5333       if (target_mode != mode)
5334         subtarget = 0;
5335
5336       if (code == GE)
5337         op0 = expand_unop (mode, one_cmpl_optab, op0,
5338                            ((STORE_FLAG_VALUE == 1 || normalizep)
5339                             ? 0 : subtarget), 0);
5340
5341       if (STORE_FLAG_VALUE == 1 || normalizep)
5342         /* If we are supposed to produce a 0/1 value, we want to do
5343            a logical shift from the sign bit to the low-order bit; for
5344            a -1/0 value, we do an arithmetic shift.  */
5345         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5346                             size_int (GET_MODE_BITSIZE (mode) - 1),
5347                             subtarget, normalizep != -1);
5348
5349       if (mode != target_mode)
5350         op0 = convert_modes (target_mode, mode, op0, 0);
5351
5352       return op0;
5353     }
5354
5355   mclass = GET_MODE_CLASS (mode);
5356   for (compare_mode = mode; compare_mode != VOIDmode;
5357        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5358     {
5359      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5360      icode = optab_handler (cstore_optab, optab_mode);
5361      if (icode != CODE_FOR_nothing)
5362         {
5363           do_pending_stack_adjust ();
5364           tem = emit_cstore (target, icode, code, mode, compare_mode,
5365                              unsignedp, op0, op1, normalizep, target_mode);
5366           if (tem)
5367             return tem;
5368
5369           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5370             {
5371               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5372                                  unsignedp, op1, op0, normalizep, target_mode);
5373               if (tem)
5374                 return tem;
5375             }
5376           break;
5377         }
5378     }
5379
5380   return 0;
5381 }
5382
5383 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5384    and storing in TARGET.  Normally return TARGET.
5385    Return 0 if that cannot be done.
5386
5387    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5388    it is VOIDmode, they cannot both be CONST_INT.
5389
5390    UNSIGNEDP is for the case where we have to widen the operands
5391    to perform the operation.  It says to use zero-extension.
5392
5393    NORMALIZEP is 1 if we should convert the result to be either zero
5394    or one.  Normalize is -1 if we should convert the result to be
5395    either zero or -1.  If NORMALIZEP is zero, the result will be left
5396    "raw" out of the scc insn.  */
5397
5398 rtx
5399 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5400                  enum machine_mode mode, int unsignedp, int normalizep)
5401 {
5402   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5403   enum rtx_code rcode;
5404   rtx subtarget;
5405   rtx tem, last, trueval;
5406
5407   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5408                            target_mode);
5409   if (tem)
5410     return tem;
5411
5412   /* If we reached here, we can't do this with a scc insn, however there
5413      are some comparisons that can be done in other ways.  Don't do any
5414      of these cases if branches are very cheap.  */
5415   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5416     return 0;
5417
5418   /* See what we need to return.  We can only return a 1, -1, or the
5419      sign bit.  */
5420
5421   if (normalizep == 0)
5422     {
5423       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5424         normalizep = STORE_FLAG_VALUE;
5425
5426       else if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT
5427                && ((STORE_FLAG_VALUE & GET_MODE_MASK (mode))
5428                    == (unsigned HOST_WIDE_INT) 1 << (GET_MODE_BITSIZE (mode) - 1)))
5429         ;
5430       else
5431         return 0;
5432     }
5433
5434   last = get_last_insn ();
5435
5436   /* If optimizing, use different pseudo registers for each insn, instead
5437      of reusing the same pseudo.  This leads to better CSE, but slows
5438      down the compiler, since there are more pseudos */
5439   subtarget = (!optimize
5440                && (target_mode == mode)) ? target : NULL_RTX;
5441   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5442
5443   /* For floating-point comparisons, try the reverse comparison or try
5444      changing the "orderedness" of the comparison.  */
5445   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5446     {
5447       enum rtx_code first_code;
5448       bool and_them;
5449
5450       rcode = reverse_condition_maybe_unordered (code);
5451       if (can_compare_p (rcode, mode, ccp_store_flag)
5452           && (code == ORDERED || code == UNORDERED
5453               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5454               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5455         {
5456           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5457                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5458
5459           /* For the reverse comparison, use either an addition or a XOR.  */
5460           if (want_add
5461               && rtx_cost (GEN_INT (normalizep), PLUS,
5462                            optimize_insn_for_speed_p ()) == 0)
5463             {
5464               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5465                                        STORE_FLAG_VALUE, target_mode);
5466               if (tem)
5467                 return expand_binop (target_mode, add_optab, tem,
5468                                      GEN_INT (normalizep),
5469                                      target, 0, OPTAB_WIDEN);
5470             }
5471           else if (!want_add
5472                    && rtx_cost (trueval, XOR,
5473                                 optimize_insn_for_speed_p ()) == 0)
5474             {
5475               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5476                                        normalizep, target_mode);
5477               if (tem)
5478                 return expand_binop (target_mode, xor_optab, tem, trueval,
5479                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5480             }
5481         }
5482
5483       delete_insns_since (last);
5484
5485       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5486       if (code == ORDERED || code == UNORDERED)
5487         return 0;
5488
5489       and_them = split_comparison (code, mode, &first_code, &code);
5490
5491       /* If there are no NaNs, the first comparison should always fall through.
5492          Effectively change the comparison to the other one.  */
5493       if (!HONOR_NANS (mode))
5494         {
5495           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5496           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5497                                     target_mode);
5498         }
5499
5500 #ifdef HAVE_conditional_move
5501       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5502          conditional move.  */
5503       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5504                                normalizep, target_mode);
5505       if (tem == 0)
5506         return 0;
5507
5508       if (and_them)
5509         tem = emit_conditional_move (target, code, op0, op1, mode,
5510                                      tem, const0_rtx, GET_MODE (tem), 0);
5511       else
5512         tem = emit_conditional_move (target, code, op0, op1, mode,
5513                                      trueval, tem, GET_MODE (tem), 0);
5514
5515       if (tem == 0)
5516         delete_insns_since (last);
5517       return tem;
5518 #else
5519       return 0;
5520 #endif
5521     }
5522
5523   /* The remaining tricks only apply to integer comparisons.  */
5524
5525   if (GET_MODE_CLASS (mode) != MODE_INT)
5526     return 0;
5527
5528   /* If this is an equality comparison of integers, we can try to exclusive-or
5529      (or subtract) the two operands and use a recursive call to try the
5530      comparison with zero.  Don't do any of these cases if branches are
5531      very cheap.  */
5532
5533   if ((code == EQ || code == NE) && op1 != const0_rtx)
5534     {
5535       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5536                           OPTAB_WIDEN);
5537
5538       if (tem == 0)
5539         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5540                             OPTAB_WIDEN);
5541       if (tem != 0)
5542         tem = emit_store_flag (target, code, tem, const0_rtx,
5543                                mode, unsignedp, normalizep);
5544       if (tem != 0)
5545         return tem;
5546
5547       delete_insns_since (last);
5548     }
5549
5550   /* For integer comparisons, try the reverse comparison.  However, for
5551      small X and if we'd have anyway to extend, implementing "X != 0"
5552      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5553   rcode = reverse_condition (code);
5554   if (can_compare_p (rcode, mode, ccp_store_flag)
5555       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5556             && code == NE
5557             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5558             && op1 == const0_rtx))
5559     {
5560       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5561                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5562
5563       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5564       if (want_add
5565           && rtx_cost (GEN_INT (normalizep), PLUS,
5566                        optimize_insn_for_speed_p ()) == 0)
5567         {
5568           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5569                                    STORE_FLAG_VALUE, target_mode);
5570           if (tem != 0)
5571             tem = expand_binop (target_mode, add_optab, tem,
5572                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5573         }
5574       else if (!want_add
5575                && rtx_cost (trueval, XOR,
5576                             optimize_insn_for_speed_p ()) == 0)
5577         {
5578           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5579                                    normalizep, target_mode);
5580           if (tem != 0)
5581             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5582                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5583         }
5584
5585       if (tem != 0)
5586         return tem;
5587       delete_insns_since (last);
5588     }
5589
5590   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5591      the constant zero.  Reject all other comparisons at this point.  Only
5592      do LE and GT if branches are expensive since they are expensive on
5593      2-operand machines.  */
5594
5595   if (op1 != const0_rtx
5596       || (code != EQ && code != NE
5597           && (BRANCH_COST (optimize_insn_for_speed_p (),
5598                            false) <= 1 || (code != LE && code != GT))))
5599     return 0;
5600
5601   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5602      do the necessary operation below.  */
5603
5604   tem = 0;
5605
5606   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5607      the sign bit set.  */
5608
5609   if (code == LE)
5610     {
5611       /* This is destructive, so SUBTARGET can't be OP0.  */
5612       if (rtx_equal_p (subtarget, op0))
5613         subtarget = 0;
5614
5615       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5616                           OPTAB_WIDEN);
5617       if (tem)
5618         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5619                             OPTAB_WIDEN);
5620     }
5621
5622   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5623      number of bits in the mode of OP0, minus one.  */
5624
5625   if (code == GT)
5626     {
5627       if (rtx_equal_p (subtarget, op0))
5628         subtarget = 0;
5629
5630       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5631                           size_int (GET_MODE_BITSIZE (mode) - 1),
5632                           subtarget, 0);
5633       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5634                           OPTAB_WIDEN);
5635     }
5636
5637   if (code == EQ || code == NE)
5638     {
5639       /* For EQ or NE, one way to do the comparison is to apply an operation
5640          that converts the operand into a positive number if it is nonzero
5641          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5642          for NE we negate.  This puts the result in the sign bit.  Then we
5643          normalize with a shift, if needed.
5644
5645          Two operations that can do the above actions are ABS and FFS, so try
5646          them.  If that doesn't work, and MODE is smaller than a full word,
5647          we can use zero-extension to the wider mode (an unsigned conversion)
5648          as the operation.  */
5649
5650       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5651          that is compensated by the subsequent overflow when subtracting
5652          one / negating.  */
5653
5654       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5655         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5656       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5657         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5658       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5659         {
5660           tem = convert_modes (word_mode, mode, op0, 1);
5661           mode = word_mode;
5662         }
5663
5664       if (tem != 0)
5665         {
5666           if (code == EQ)
5667             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5668                                 0, OPTAB_WIDEN);
5669           else
5670             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5671         }
5672
5673       /* If we couldn't do it that way, for NE we can "or" the two's complement
5674          of the value with itself.  For EQ, we take the one's complement of
5675          that "or", which is an extra insn, so we only handle EQ if branches
5676          are expensive.  */
5677
5678       if (tem == 0
5679           && (code == NE
5680               || BRANCH_COST (optimize_insn_for_speed_p (),
5681                               false) > 1))
5682         {
5683           if (rtx_equal_p (subtarget, op0))
5684             subtarget = 0;
5685
5686           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5687           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5688                               OPTAB_WIDEN);
5689
5690           if (tem && code == EQ)
5691             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5692         }
5693     }
5694
5695   if (tem && normalizep)
5696     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5697                         size_int (GET_MODE_BITSIZE (mode) - 1),
5698                         subtarget, normalizep == 1);
5699
5700   if (tem)
5701     {
5702       if (!target)
5703         ;
5704       else if (GET_MODE (tem) != target_mode)
5705         {
5706           convert_move (target, tem, 0);
5707           tem = target;
5708         }
5709       else if (!subtarget)
5710         {
5711           emit_move_insn (target, tem);
5712           tem = target;
5713         }
5714     }
5715   else
5716     delete_insns_since (last);
5717
5718   return tem;
5719 }
5720
5721 /* Like emit_store_flag, but always succeeds.  */
5722
5723 rtx
5724 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5725                        enum machine_mode mode, int unsignedp, int normalizep)
5726 {
5727   rtx tem, label;
5728   rtx trueval, falseval;
5729
5730   /* First see if emit_store_flag can do the job.  */
5731   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5732   if (tem != 0)
5733     return tem;
5734
5735   if (!target)
5736     target = gen_reg_rtx (word_mode);
5737
5738   /* If this failed, we have to do this with set/compare/jump/set code.
5739      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5740   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5741   if (code == NE
5742       && GET_MODE_CLASS (mode) == MODE_INT
5743       && REG_P (target)
5744       && op0 == target
5745       && op1 == const0_rtx)
5746     {
5747       label = gen_label_rtx ();
5748       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5749                                mode, NULL_RTX, NULL_RTX, label, -1);
5750       emit_move_insn (target, trueval);
5751       emit_label (label);
5752       return target;
5753     }
5754
5755   if (!REG_P (target)
5756       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5757     target = gen_reg_rtx (GET_MODE (target));
5758
5759   /* Jump in the right direction if the target cannot implement CODE
5760      but can jump on its reverse condition.  */
5761   falseval = const0_rtx;
5762   if (! can_compare_p (code, mode, ccp_jump)
5763       && (! FLOAT_MODE_P (mode)
5764           || code == ORDERED || code == UNORDERED
5765           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5766           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5767     {
5768       enum rtx_code rcode;
5769       if (FLOAT_MODE_P (mode))
5770         rcode = reverse_condition_maybe_unordered (code);
5771       else
5772         rcode = reverse_condition (code);
5773
5774       /* Canonicalize to UNORDERED for the libcall.  */
5775       if (can_compare_p (rcode, mode, ccp_jump)
5776           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5777         {
5778           falseval = trueval;
5779           trueval = const0_rtx;
5780           code = rcode;
5781         }
5782     }
5783
5784   emit_move_insn (target, trueval);
5785   label = gen_label_rtx ();
5786   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5787                            NULL_RTX, label, -1);
5788
5789   emit_move_insn (target, falseval);
5790   emit_label (label);
5791
5792   return target;
5793 }
5794 \f
5795 /* Perform possibly multi-word comparison and conditional jump to LABEL
5796    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5797    now a thin wrapper around do_compare_rtx_and_jump.  */
5798
5799 static void
5800 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5801                  rtx label)
5802 {
5803   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5804   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5805                            NULL_RTX, NULL_RTX, label, -1);
5806 }