gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    rtx);
  58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  59                                     unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT,
  61                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  62 static rtx mask_rtx (enum machine_mode, int, int, int);
  63 static rtx lshift_value (enum machine_mode, rtx, int, int);
  64 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  65                                     unsigned HOST_WIDE_INT, int);
  66 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  67 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  68 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  69
  70 /* Test whether a value is zero of a power of two.  */
  71 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  72
  73 #ifndef SLOW_UNALIGNED_ACCESS
  74 #define SLOW_UNALIGNED_ACCESS(MODE, ALIGN) STRICT_ALIGNMENT
  75 #endif
  76
  77
  78 /* Reduce conditional compilation elsewhere.  */
  79 #ifndef HAVE_insv
  80 #define HAVE_insv       0
  81 #define CODE_FOR_insv   CODE_FOR_nothing
  82 #define gen_insv(a,b,c,d) NULL_RTX
  83 #endif
  84 #ifndef HAVE_extv
  85 #define HAVE_extv       0
  86 #define CODE_FOR_extv   CODE_FOR_nothing
  87 #define gen_extv(a,b,c,d) NULL_RTX
  88 #endif
  89 #ifndef HAVE_extzv
  90 #define HAVE_extzv      0
  91 #define CODE_FOR_extzv  CODE_FOR_nothing
  92 #define gen_extzv(a,b,c,d) NULL_RTX
  93 #endif
  94
  95 struct init_expmed_rtl
  96 {
  97   struct rtx_def reg;           rtunion reg_fld[2];
  98   struct rtx_def plus;  rtunion plus_fld1;
  99   struct rtx_def neg;
 100   struct rtx_def mult;  rtunion mult_fld1;
 101   struct rtx_def sdiv;  rtunion sdiv_fld1;
 102   struct rtx_def udiv;  rtunion udiv_fld1;
 103   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
 104   struct rtx_def smod_32;       rtunion smod_32_fld1;
 105   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
 106   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
 107   struct rtx_def wide_trunc;
 108   struct rtx_def shift; rtunion shift_fld1;
 109   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
 110   struct rtx_def shift_add;     rtunion shift_add_fld1;
 111   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
 112   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
 113   struct rtx_def zext;
 114   struct rtx_def trunc;
 115
 116   rtx pow2[MAX_BITS_PER_WORD];
 117   rtx cint[MAX_BITS_PER_WORD];
 118 };
 119
 120 static void
 121 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 122                       enum machine_mode from_mode, bool speed)
 123 {
 124   int to_size, from_size;
 125   rtx which;
 126
 127   /* We're given no information about the true size of a partial integer,
 128      only the size of the "full" integer it requires for storage.  For
 129      comparison purposes here, reduce the bit size by one in that case.  */
 130   to_size = (GET_MODE_BITSIZE (to_mode)
 131              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 132   from_size = (GET_MODE_BITSIZE (from_mode)
 133                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 134
 135   /* Assume cost of zero-extend and sign-extend is the same.  */
 136   which = (to_size < from_size ? &all->trunc : &all->zext);
 137
 138   PUT_MODE (&all->reg, from_mode);
 139   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 140 }
 141
 142 static void
 143 init_expmed_one_mode (struct init_expmed_rtl *all,
 144                       enum machine_mode mode, int speed)
 145 {
 146   int m, n, mode_bitsize;
 147   enum machine_mode mode_from;
 148
 149   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 150
 151   PUT_MODE (&all->reg, mode);
 152   PUT_MODE (&all->plus, mode);
 153   PUT_MODE (&all->neg, mode);
 154   PUT_MODE (&all->mult, mode);
 155   PUT_MODE (&all->sdiv, mode);
 156   PUT_MODE (&all->udiv, mode);
 157   PUT_MODE (&all->sdiv_32, mode);
 158   PUT_MODE (&all->smod_32, mode);
 159   PUT_MODE (&all->wide_trunc, mode);
 160   PUT_MODE (&all->shift, mode);
 161   PUT_MODE (&all->shift_mult, mode);
 162   PUT_MODE (&all->shift_add, mode);
 163   PUT_MODE (&all->shift_sub0, mode);
 164   PUT_MODE (&all->shift_sub1, mode);
 165   PUT_MODE (&all->zext, mode);
 166   PUT_MODE (&all->trunc, mode);
 167
 168   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 169   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 170   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 171   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 172   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 173
 174   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 175                                      <= 2 * add_cost (speed, mode)));
 176   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 177                                      <= 4 * add_cost (speed, mode)));
 178
 179   set_shift_cost (speed, mode, 0, 0);
 180   {
 181     int cost = add_cost (speed, mode);
 182     set_shiftadd_cost (speed, mode, 0, cost);
 183     set_shiftsub0_cost (speed, mode, 0, cost);
 184     set_shiftsub1_cost (speed, mode, 0, cost);
 185   }
 186
 187   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 188   for (m = 1; m < n; m++)
 189     {
 190       XEXP (&all->shift, 1) = all->cint[m];
 191       XEXP (&all->shift_mult, 1) = all->pow2[m];
 192
 193       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 194       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 195       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 196       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 197     }
 198
 199   if (SCALAR_INT_MODE_P (mode))
 200     {
 201       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 202            mode_from = (enum machine_mode)(mode_from + 1))
 203         init_expmed_one_conv (all, mode, mode_from, speed);
 204     }
 205   if (GET_MODE_CLASS (mode) == MODE_INT)
 206     {
 207       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 208       if (wider_mode != VOIDmode)
 209         {
 210           PUT_MODE (&all->zext, wider_mode);
 211           PUT_MODE (&all->wide_mult, wider_mode);
 212           PUT_MODE (&all->wide_lshr, wider_mode);
 213           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 214
 215           set_mul_widen_cost (speed, wider_mode,
 216                               set_src_cost (&all->wide_mult, speed));
 217           set_mul_highpart_cost (speed, mode,
 218                                  set_src_cost (&all->wide_trunc, speed));
 219         }
 220     }
 221 }
 222
 223 void
 224 init_expmed (void)
 225 {
 226   struct init_expmed_rtl all;
 227   enum machine_mode mode;
 228   int m, speed;
 229
 230   memset (&all, 0, sizeof all);
 231   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 232     {
 233       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 234       all.cint[m] = GEN_INT (m);
 235     }
 236
 237   PUT_CODE (&all.reg, REG);
 238   /* Avoid using hard regs in ways which may be unsupported.  */
 239   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 240
 241   PUT_CODE (&all.plus, PLUS);
 242   XEXP (&all.plus, 0) = &all.reg;
 243   XEXP (&all.plus, 1) = &all.reg;
 244
 245   PUT_CODE (&all.neg, NEG);
 246   XEXP (&all.neg, 0) = &all.reg;
 247
 248   PUT_CODE (&all.mult, MULT);
 249   XEXP (&all.mult, 0) = &all.reg;
 250   XEXP (&all.mult, 1) = &all.reg;
 251
 252   PUT_CODE (&all.sdiv, DIV);
 253   XEXP (&all.sdiv, 0) = &all.reg;
 254   XEXP (&all.sdiv, 1) = &all.reg;
 255
 256   PUT_CODE (&all.udiv, UDIV);
 257   XEXP (&all.udiv, 0) = &all.reg;
 258   XEXP (&all.udiv, 1) = &all.reg;
 259
 260   PUT_CODE (&all.sdiv_32, DIV);
 261   XEXP (&all.sdiv_32, 0) = &all.reg;
 262   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 263
 264   PUT_CODE (&all.smod_32, MOD);
 265   XEXP (&all.smod_32, 0) = &all.reg;
 266   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 267
 268   PUT_CODE (&all.zext, ZERO_EXTEND);
 269   XEXP (&all.zext, 0) = &all.reg;
 270
 271   PUT_CODE (&all.wide_mult, MULT);
 272   XEXP (&all.wide_mult, 0) = &all.zext;
 273   XEXP (&all.wide_mult, 1) = &all.zext;
 274
 275   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 276   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 277
 278   PUT_CODE (&all.wide_trunc, TRUNCATE);
 279   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 280
 281   PUT_CODE (&all.shift, ASHIFT);
 282   XEXP (&all.shift, 0) = &all.reg;
 283
 284   PUT_CODE (&all.shift_mult, MULT);
 285   XEXP (&all.shift_mult, 0) = &all.reg;
 286
 287   PUT_CODE (&all.shift_add, PLUS);
 288   XEXP (&all.shift_add, 0) = &all.shift_mult;
 289   XEXP (&all.shift_add, 1) = &all.reg;
 290
 291   PUT_CODE (&all.shift_sub0, MINUS);
 292   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 293   XEXP (&all.shift_sub0, 1) = &all.reg;
 294
 295   PUT_CODE (&all.shift_sub1, MINUS);
 296   XEXP (&all.shift_sub1, 0) = &all.reg;
 297   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 298
 299   PUT_CODE (&all.trunc, TRUNCATE);
 300   XEXP (&all.trunc, 0) = &all.reg;
 301
 302   for (speed = 0; speed < 2; speed++)
 303     {
 304       crtl->maybe_hot_insn_p = speed;
 305       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 306
 307       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 308            mode = (enum machine_mode)(mode + 1))
 309         init_expmed_one_mode (&all, mode, speed);
 310
 311       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 312         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 313              mode = (enum machine_mode)(mode + 1))
 314           init_expmed_one_mode (&all, mode, speed);
 315
 316       if (MIN_MODE_VECTOR_INT != VOIDmode)
 317         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 318              mode = (enum machine_mode)(mode + 1))
 319           init_expmed_one_mode (&all, mode, speed);
 320     }
 321
 322   if (alg_hash_used_p ())
 323     {
 324       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 325       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 326     }
 327   else
 328     set_alg_hash_used_p (true);
 329   default_rtl_profile ();
 330 }
 331
 332 /* Return an rtx representing minus the value of X.
 333    MODE is the intended mode of the result,
 334    useful if X is a CONST_INT.  */
 335
 336 rtx
 337 negate_rtx (enum machine_mode mode, rtx x)
 338 {
 339   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 340
 341   if (result == 0)
 342     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 343
 344   return result;
 345 }
 346
 347 /* Report on the availability of insv/extv/extzv and the desired mode
 348    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 349    is false; else the mode of the specified operand.  If OPNO is -1,
 350    all the caller cares about is whether the insn is available.  */
 351 enum machine_mode
 352 mode_for_extraction (enum extraction_pattern pattern, int opno)
 353 {
 354   const struct insn_data_d *data;
 355
 356   switch (pattern)
 357     {
 358     case EP_insv:
 359       if (HAVE_insv)
 360         {
 361           data = &insn_data[CODE_FOR_insv];
 362           break;
 363         }
 364       return MAX_MACHINE_MODE;
 365
 366     case EP_extv:
 367       if (HAVE_extv)
 368         {
 369           data = &insn_data[CODE_FOR_extv];
 370           break;
 371         }
 372       return MAX_MACHINE_MODE;
 373
 374     case EP_extzv:
 375       if (HAVE_extzv)
 376         {
 377           data = &insn_data[CODE_FOR_extzv];
 378           break;
 379         }
 380       return MAX_MACHINE_MODE;
 381
 382     default:
 383       gcc_unreachable ();
 384     }
 385
 386   if (opno == -1)
 387     return VOIDmode;
 388
 389   /* Everyone who uses this function used to follow it with
 390      if (result == VOIDmode) result = word_mode; */
 391   if (data->operand[opno].mode == VOIDmode)
 392     return word_mode;
 393   return data->operand[opno].mode;
 394 }
 395
 396 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 397    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 398    offset is then BITNUM / BITS_PER_UNIT.  */
 399
 400 static bool
 401 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 402                      unsigned HOST_WIDE_INT bitsize,
 403                      enum machine_mode struct_mode)
 404 {
 405   if (BYTES_BIG_ENDIAN)
 406     return (bitnum % BITS_PER_UNIT
 407             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 408                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 409   else
 410     return bitnum % BITS_PER_WORD == 0;
 411 }
 412 \f
 413 /* A subroutine of store_bit_field, with the same arguments.  Return true
 414    if the operation could be implemented.
 415
 416    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 417    no other way of implementing the operation.  If FALLBACK_P is false,
 418    return false instead.  */
 419
 420 static bool
 421 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 422                    unsigned HOST_WIDE_INT bitnum,
 423                    unsigned HOST_WIDE_INT bitregion_start,
 424                    unsigned HOST_WIDE_INT bitregion_end,
 425                    enum machine_mode fieldmode,
 426                    rtx value, bool fallback_p)
 427 {
 428   rtx op0 = str_rtx;
 429   rtx orig_value;
 430
 431   while (GET_CODE (op0) == SUBREG)
 432     {
 433       /* The following line once was done only if WORDS_BIG_ENDIAN,
 434          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 435          meaningful at a much higher level; when structures are copied
 436          between memory and regs, the higher-numbered regs
 437          always get higher addresses.  */
 438       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 439       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 440       int byte_offset = 0;
 441
 442       /* Paradoxical subregs need special handling on big endian machines.  */
 443       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 444         {
 445           int difference = inner_mode_size - outer_mode_size;
 446
 447           if (WORDS_BIG_ENDIAN)
 448             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 449           if (BYTES_BIG_ENDIAN)
 450             byte_offset += difference % UNITS_PER_WORD;
 451         }
 452       else
 453         byte_offset = SUBREG_BYTE (op0);
 454
 455       bitnum += byte_offset * BITS_PER_UNIT;
 456       op0 = SUBREG_REG (op0);
 457     }
 458
 459   /* No action is needed if the target is a register and if the field
 460      lies completely outside that register.  This can occur if the source
 461      code contains an out-of-bounds access to a small array.  */
 462   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 463     return true;
 464
 465   /* Use vec_set patterns for inserting parts of vectors whenever
 466      available.  */
 467   if (VECTOR_MODE_P (GET_MODE (op0))
 468       && !MEM_P (op0)
 469       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 470       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 471       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 472       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 473     {
 474       struct expand_operand ops[3];
 475       enum machine_mode outermode = GET_MODE (op0);
 476       enum machine_mode innermode = GET_MODE_INNER (outermode);
 477       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 478       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 479
 480       create_fixed_operand (&ops[0], op0);
 481       create_input_operand (&ops[1], value, innermode);
 482       create_integer_operand (&ops[2], pos);
 483       if (maybe_expand_insn (icode, 3, ops))
 484         return true;
 485     }
 486
 487   /* If the target is a register, overwriting the entire object, or storing
 488      a full-word or multi-word field can be done with just a SUBREG.  */
 489   if (!MEM_P (op0)
 490       && bitsize == GET_MODE_BITSIZE (fieldmode)
 491       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 492           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 493     {
 494       /* Use the subreg machinery either to narrow OP0 to the required
 495          words or to cope with mode punning between equal-sized modes.  */
 496       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 497                                      bitnum / BITS_PER_UNIT);
 498       if (sub)
 499         {
 500           emit_move_insn (sub, value);
 501           return true;
 502         }
 503     }
 504
 505   /* If the target is memory, storing any naturally aligned field can be
 506      done with a simple store.  For targets that support fast unaligned
 507      memory, any naturally sized, unit aligned field can be done directly.  */
 508   if (MEM_P (op0)
 509       && bitnum % BITS_PER_UNIT == 0
 510       && bitsize == GET_MODE_BITSIZE (fieldmode)
 511       && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 512           || (bitnum % bitsize == 0
 513               && MEM_ALIGN (op0) % bitsize == 0)))
 514     {
 515       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 516       emit_move_insn (op0, value);
 517       return true;
 518     }
 519
 520   /* Make sure we are playing with integral modes.  Pun with subregs
 521      if we aren't.  This must come after the entire register case above,
 522      since that case is valid for any mode.  The following cases are only
 523      valid for integral modes.  */
 524   {
 525     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 526     if (imode != GET_MODE (op0))
 527       {
 528         if (MEM_P (op0))
 529           op0 = adjust_bitfield_address (op0, imode, 0);
 530         else
 531           {
 532             gcc_assert (imode != BLKmode);
 533             op0 = gen_lowpart (imode, op0);
 534           }
 535       }
 536   }
 537
 538   /* Storing an lsb-aligned field in a register
 539      can be done with a movstrict instruction.  */
 540
 541   if (!MEM_P (op0)
 542       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 543       && bitsize == GET_MODE_BITSIZE (fieldmode)
 544       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 545     {
 546       struct expand_operand ops[2];
 547       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 548       rtx arg0 = op0;
 549       unsigned HOST_WIDE_INT subreg_off;
 550
 551       if (GET_CODE (arg0) == SUBREG)
 552         {
 553           /* Else we've got some float mode source being extracted into
 554              a different float mode destination -- this combination of
 555              subregs results in Severe Tire Damage.  */
 556           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 557                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 558                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 559           arg0 = SUBREG_REG (arg0);
 560         }
 561
 562       subreg_off = bitnum / BITS_PER_UNIT;
 563       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 564         {
 565           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 566
 567           create_fixed_operand (&ops[0], arg0);
 568           /* Shrink the source operand to FIELDMODE.  */
 569           create_convert_operand_to (&ops[1], value, fieldmode, false);
 570           if (maybe_expand_insn (icode, 2, ops))
 571             return true;
 572         }
 573     }
 574
 575   /* Handle fields bigger than a word.  */
 576
 577   if (bitsize > BITS_PER_WORD)
 578     {
 579       /* Here we transfer the words of the field
 580          in the order least significant first.
 581          This is because the most significant word is the one which may
 582          be less than full.
 583          However, only do that if the value is not BLKmode.  */
 584
 585       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 586       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 587       unsigned int i;
 588       rtx last;
 589
 590       /* This is the mode we must force value to, so that there will be enough
 591          subwords to extract.  Note that fieldmode will often (always?) be
 592          VOIDmode, because that is what store_field uses to indicate that this
 593          is a bit field, but passing VOIDmode to operand_subword_force
 594          is not allowed.  */
 595       fieldmode = GET_MODE (value);
 596       if (fieldmode == VOIDmode)
 597         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 598
 599       last = get_last_insn ();
 600       for (i = 0; i < nwords; i++)
 601         {
 602           /* If I is 0, use the low-order word in both field and target;
 603              if I is 1, use the next to lowest word; and so on.  */
 604           unsigned int wordnum = (backwards
 605                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 606                                   - i - 1
 607                                   : i);
 608           unsigned int bit_offset = (backwards
 609                                      ? MAX ((int) bitsize - ((int) i + 1)
 610                                             * BITS_PER_WORD,
 611                                             0)
 612                                      : (int) i * BITS_PER_WORD);
 613           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 614           unsigned HOST_WIDE_INT new_bitsize =
 615             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 616
 617           /* If the remaining chunk doesn't have full wordsize we have
 618              to make sure that for big endian machines the higher order
 619              bits are used.  */
 620           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 621             value_word = simplify_expand_binop (word_mode, lshr_optab,
 622                                                 value_word,
 623                                                 GEN_INT (BITS_PER_WORD
 624                                                          - new_bitsize),
 625                                                 NULL_RTX, true,
 626                                                 OPTAB_LIB_WIDEN);
 627
 628           if (!store_bit_field_1 (op0, new_bitsize,
 629                                   bitnum + bit_offset,
 630                                   bitregion_start, bitregion_end,
 631                                   word_mode,
 632                                   value_word, fallback_p))
 633             {
 634               delete_insns_since (last);
 635               return false;
 636             }
 637         }
 638       return true;
 639     }
 640
 641   /* If VALUE has a floating-point or complex mode, access it as an
 642      integer of the corresponding size.  This can occur on a machine
 643      with 64 bit registers that uses SFmode for float.  It can also
 644      occur for unaligned float or complex fields.  */
 645   orig_value = value;
 646   if (GET_MODE (value) != VOIDmode
 647       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 648       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 649     {
 650       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 651       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 652     }
 653
 654   /* If OP0 is a multi-word register, narrow it to the affected word.
 655      If the region spans two words, defer to store_split_bit_field.  */
 656   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 657     {
 658       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 659                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 660       gcc_assert (op0);
 661       bitnum %= BITS_PER_WORD;
 662       if (bitnum + bitsize > BITS_PER_WORD)
 663         {
 664           if (!fallback_p)
 665             return false;
 666
 667           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 668                                  bitregion_end, value);
 669           return true;
 670         }
 671     }
 672
 673   /* From here on we can assume that the field to be stored in fits
 674      within a word.  If the destination is a register, it too fits
 675      in a word.  */
 676
 677   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 678   if (HAVE_insv
 679       && GET_MODE (value) != BLKmode
 680       && bitsize > 0
 681       && GET_MODE_BITSIZE (op_mode) >= bitsize
 682       /* Do not use insv for volatile bitfields when
 683          -fstrict-volatile-bitfields is in effect.  */
 684       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 685            && flag_strict_volatile_bitfields > 0)
 686       /* Do not use insv if the bit region is restricted and
 687          op_mode integer at offset doesn't fit into the
 688          restricted region.  */
 689       && !(MEM_P (op0) && bitregion_end
 690            && bitnum - (bitnum % BITS_PER_UNIT) + GET_MODE_BITSIZE (op_mode)
 691               > bitregion_end + 1))
 692     {
 693       struct expand_operand ops[4];
 694       unsigned HOST_WIDE_INT bitpos = bitnum;
 695       rtx value1;
 696       rtx xop0 = op0;
 697       rtx last = get_last_insn ();
 698       bool copy_back = false;
 699
 700       unsigned int unit = GET_MODE_BITSIZE (op_mode);
 701       if (MEM_P (xop0))
 702         {
 703           /* Get a reference to the first byte of the field.  */
 704           xop0 = adjust_bitfield_address (xop0, byte_mode,
 705                                           bitpos / BITS_PER_UNIT);
 706           bitpos %= BITS_PER_UNIT;
 707         }
 708       else
 709         {
 710           /* Convert from counting within OP0 to counting in OP_MODE.  */
 711           if (BYTES_BIG_ENDIAN)
 712             bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 713         }
 714
 715       /* If xop0 is a register, we need it in OP_MODE
 716          to make it acceptable to the format of insv.  */
 717       if (GET_CODE (xop0) == SUBREG)
 718         /* We can't just change the mode, because this might clobber op0,
 719            and we will need the original value of op0 if insv fails.  */
 720         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 721       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 722         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 723
 724       /* If the destination is a paradoxical subreg such that we need a
 725          truncate to the inner mode, perform the insertion on a temporary and
 726          truncate the result to the original destination.  Note that we can't
 727          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 728          X) 0)) is (reg:N X).  */
 729       if (GET_CODE (xop0) == SUBREG
 730           && REG_P (SUBREG_REG (xop0))
 731           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 732                                               op_mode)))
 733         {
 734           rtx tem = gen_reg_rtx (op_mode);
 735           emit_move_insn (tem, xop0);
 736           xop0 = tem;
 737           copy_back = true;
 738         }
 739
 740       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 741          "backwards" from the size of the unit we are inserting into.
 742          Otherwise, we count bits from the most significant on a
 743          BYTES/BITS_BIG_ENDIAN machine.  */
 744
 745       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 746         bitpos = unit - bitsize - bitpos;
 747
 748       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 749       value1 = value;
 750       if (GET_MODE (value) != op_mode)
 751         {
 752           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 753             {
 754               /* Optimization: Don't bother really extending VALUE
 755                  if it has all the bits we will actually use.  However,
 756                  if we must narrow it, be sure we do it correctly.  */
 757
 758               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 759                 {
 760                   rtx tmp;
 761
 762                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 763                   if (! tmp)
 764                     tmp = simplify_gen_subreg (op_mode,
 765                                                force_reg (GET_MODE (value),
 766                                                           value1),
 767                                                GET_MODE (value), 0);
 768                   value1 = tmp;
 769                 }
 770               else
 771                 value1 = gen_lowpart (op_mode, value1);
 772             }
 773           else if (CONST_INT_P (value))
 774             value1 = gen_int_mode (INTVAL (value), op_mode);
 775           else
 776             /* Parse phase is supposed to make VALUE's data type
 777                match that of the component reference, which is a type
 778                at least as wide as the field; so VALUE should have
 779                a mode that corresponds to that type.  */
 780             gcc_assert (CONSTANT_P (value));
 781         }
 782
 783       create_fixed_operand (&ops[0], xop0);
 784       create_integer_operand (&ops[1], bitsize);
 785       create_integer_operand (&ops[2], bitpos);
 786       create_input_operand (&ops[3], value1, op_mode);
 787       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 788         {
 789           if (copy_back)
 790             convert_move (op0, xop0, true);
 791           return true;
 792         }
 793       delete_insns_since (last);
 794     }
 795
 796   /* If OP0 is a memory, try copying it to a register and seeing if a
 797      cheap register alternative is available.  */
 798   if (HAVE_insv && MEM_P (op0))
 799     {
 800       enum machine_mode bestmode;
 801       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 802
 803       if (bitregion_end)
 804         maxbits = bitregion_end - bitregion_start + 1;
 805
 806       /* Get the mode to use for inserting into this field.  If OP0 is
 807          BLKmode, get the smallest mode consistent with the alignment. If
 808          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 809          mode. Otherwise, use the smallest mode containing the field.  */
 810
 811       if (GET_MODE (op0) == BLKmode
 812           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 813           || (op_mode != MAX_MACHINE_MODE
 814               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 815         bestmode = get_best_mode (bitsize, bitnum,
 816                                   bitregion_start, bitregion_end,
 817                                   MEM_ALIGN (op0),
 818                                   (op_mode == MAX_MACHINE_MODE
 819                                    ? VOIDmode : op_mode),
 820                                   MEM_VOLATILE_P (op0));
 821       else
 822         bestmode = GET_MODE (op0);
 823
 824       if (bestmode != VOIDmode
 825           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 826           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 827                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 828         {
 829           rtx last, tempreg, xop0;
 830           unsigned int unit;
 831           unsigned HOST_WIDE_INT offset, bitpos;
 832
 833           last = get_last_insn ();
 834
 835           /* Adjust address to point to the containing unit of
 836              that mode.  Compute the offset as a multiple of this unit,
 837              counting in bytes.  */
 838           unit = GET_MODE_BITSIZE (bestmode);
 839           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 840           bitpos = bitnum % unit;
 841           xop0 = adjust_bitfield_address (op0, bestmode, offset);
 842
 843           /* Fetch that unit, store the bitfield in it, then store
 844              the unit.  */
 845           tempreg = copy_to_reg (xop0);
 846           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 847                                  bitregion_start, bitregion_end,
 848                                  fieldmode, orig_value, false))
 849             {
 850               emit_move_insn (xop0, tempreg);
 851               return true;
 852             }
 853           delete_insns_since (last);
 854         }
 855     }
 856
 857   if (!fallback_p)
 858     return false;
 859
 860   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 861                          bitregion_end, value);
 862   return true;
 863 }
 864
 865 /* Generate code to store value from rtx VALUE
 866    into a bit-field within structure STR_RTX
 867    containing BITSIZE bits starting at bit BITNUM.
 868
 869    BITREGION_START is bitpos of the first bitfield in this region.
 870    BITREGION_END is the bitpos of the ending bitfield in this region.
 871    These two fields are 0, if the C++ memory model does not apply,
 872    or we are not interested in keeping track of bitfield regions.
 873
 874    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 875
 876 void
 877 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 878                  unsigned HOST_WIDE_INT bitnum,
 879                  unsigned HOST_WIDE_INT bitregion_start,
 880                  unsigned HOST_WIDE_INT bitregion_end,
 881                  enum machine_mode fieldmode,
 882                  rtx value)
 883 {
 884   /* Under the C++0x memory model, we must not touch bits outside the
 885      bit region.  Adjust the address to start at the beginning of the
 886      bit region.  */
 887   if (MEM_P (str_rtx) && bitregion_start > 0)
 888     {
 889       enum machine_mode bestmode;
 890       enum machine_mode op_mode;
 891       unsigned HOST_WIDE_INT offset;
 892
 893       op_mode = mode_for_extraction (EP_insv, 3);
 894       if (op_mode == MAX_MACHINE_MODE)
 895         op_mode = VOIDmode;
 896
 897       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 898
 899       offset = bitregion_start / BITS_PER_UNIT;
 900       bitnum -= bitregion_start;
 901       bitregion_end -= bitregion_start;
 902       bitregion_start = 0;
 903       bestmode = get_best_mode (bitsize, bitnum,
 904                                 bitregion_start, bitregion_end,
 905                                 MEM_ALIGN (str_rtx),
 906                                 op_mode,
 907                                 MEM_VOLATILE_P (str_rtx));
 908       str_rtx = adjust_address (str_rtx, bestmode, offset);
 909     }
 910
 911   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 912                           bitregion_start, bitregion_end,
 913                           fieldmode, value, true))
 914     gcc_unreachable ();
 915 }
 916 \f
 917 /* Use shifts and boolean operations to store VALUE into a bit field of
 918    width BITSIZE in OP0, starting at bit BITNUM.  */
 919
 920 static void
 921 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 922                        unsigned HOST_WIDE_INT bitnum,
 923                        unsigned HOST_WIDE_INT bitregion_start,
 924                        unsigned HOST_WIDE_INT bitregion_end,
 925                        rtx value)
 926 {
 927   enum machine_mode mode;
 928   rtx temp;
 929   int all_zero = 0;
 930   int all_one = 0;
 931
 932   /* There is a case not handled here:
 933      a structure with a known alignment of just a halfword
 934      and a field split across two aligned halfwords within the structure.
 935      Or likewise a structure with a known alignment of just a byte
 936      and a field split across two bytes.
 937      Such cases are not supposed to be able to occur.  */
 938
 939   if (MEM_P (op0))
 940     {
 941       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 942
 943       if (bitregion_end)
 944         maxbits = bitregion_end - bitregion_start + 1;
 945
 946       /* Get the proper mode to use for this field.  We want a mode that
 947          includes the entire field.  If such a mode would be larger than
 948          a word, we won't be doing the extraction the normal way.
 949          We don't want a mode bigger than the destination.  */
 950
 951       mode = GET_MODE (op0);
 952       if (GET_MODE_BITSIZE (mode) == 0
 953           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 954         mode = word_mode;
 955
 956       if (MEM_VOLATILE_P (op0)
 957           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 958           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 959           && flag_strict_volatile_bitfields > 0)
 960         mode = GET_MODE (op0);
 961       else
 962         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 963                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 964
 965       if (mode == VOIDmode)
 966         {
 967           /* The only way this should occur is if the field spans word
 968              boundaries.  */
 969           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 970                                  bitregion_end, value);
 971           return;
 972         }
 973
 974       HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
 975       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
 976       bitnum -= bit_offset;
 977     }
 978
 979   mode = GET_MODE (op0);
 980   gcc_assert (SCALAR_INT_MODE_P (mode));
 981
 982   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 983      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 984
 985   if (BYTES_BIG_ENDIAN)
 986     /* BITNUM is the distance between our msb
 987        and that of the containing datum.
 988        Convert it to the distance from the lsb.  */
 989     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 990
 991   /* Now BITNUM is always the distance between our lsb
 992      and that of OP0.  */
 993
 994   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 995      we must first convert its mode to MODE.  */
 996
 997   if (CONST_INT_P (value))
 998     {
 999       HOST_WIDE_INT v = INTVAL (value);
1000
1001       if (bitsize < HOST_BITS_PER_WIDE_INT)
1002         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
1003
1004       if (v == 0)
1005         all_zero = 1;
1006       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1007                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1008                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1009         all_one = 1;
1010
1011       value = lshift_value (mode, value, bitnum, bitsize);
1012     }
1013   else
1014     {
1015       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1016                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1017
1018       if (GET_MODE (value) != mode)
1019         value = convert_to_mode (mode, value, 1);
1020
1021       if (must_and)
1022         value = expand_binop (mode, and_optab, value,
1023                               mask_rtx (mode, 0, bitsize, 0),
1024                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1025       if (bitnum > 0)
1026         value = expand_shift (LSHIFT_EXPR, mode, value,
1027                               bitnum, NULL_RTX, 1);
1028     }
1029
1030   /* Now clear the chosen bits in OP0,
1031      except that if VALUE is -1 we need not bother.  */
1032   /* We keep the intermediates in registers to allow CSE to combine
1033      consecutive bitfield assignments.  */
1034
1035   temp = force_reg (mode, op0);
1036
1037   if (! all_one)
1038     {
1039       temp = expand_binop (mode, and_optab, temp,
1040                            mask_rtx (mode, bitnum, bitsize, 1),
1041                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1042       temp = force_reg (mode, temp);
1043     }
1044
1045   /* Now logical-or VALUE into OP0, unless it is zero.  */
1046
1047   if (! all_zero)
1048     {
1049       temp = expand_binop (mode, ior_optab, temp, value,
1050                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1051       temp = force_reg (mode, temp);
1052     }
1053
1054   if (op0 != temp)
1055     {
1056       op0 = copy_rtx (op0);
1057       emit_move_insn (op0, temp);
1058     }
1059 }
1060 \f
1061 /* Store a bit field that is split across multiple accessible memory objects.
1062
1063    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1064    BITSIZE is the field width; BITPOS the position of its first bit
1065    (within the word).
1066    VALUE is the value to store.
1067
1068    This does not yet handle fields wider than BITS_PER_WORD.  */
1069
1070 static void
1071 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1072                        unsigned HOST_WIDE_INT bitpos,
1073                        unsigned HOST_WIDE_INT bitregion_start,
1074                        unsigned HOST_WIDE_INT bitregion_end,
1075                        rtx value)
1076 {
1077   unsigned int unit;
1078   unsigned int bitsdone = 0;
1079
1080   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1081      much at a time.  */
1082   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1083     unit = BITS_PER_WORD;
1084   else
1085     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1086
1087   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1088      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1089      that VALUE might be a floating-point constant.  */
1090   if (CONSTANT_P (value) && !CONST_INT_P (value))
1091     {
1092       rtx word = gen_lowpart_common (word_mode, value);
1093
1094       if (word && (value != word))
1095         value = word;
1096       else
1097         value = gen_lowpart_common (word_mode,
1098                                     force_reg (GET_MODE (value) != VOIDmode
1099                                                ? GET_MODE (value)
1100                                                : word_mode, value));
1101     }
1102
1103   while (bitsdone < bitsize)
1104     {
1105       unsigned HOST_WIDE_INT thissize;
1106       rtx part, word;
1107       unsigned HOST_WIDE_INT thispos;
1108       unsigned HOST_WIDE_INT offset;
1109
1110       offset = (bitpos + bitsdone) / unit;
1111       thispos = (bitpos + bitsdone) % unit;
1112
1113       /* When region of bytes we can touch is restricted, decrease
1114          UNIT close to the end of the region as needed.  */
1115       if (bitregion_end
1116           && unit > BITS_PER_UNIT
1117           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1118         {
1119           unit = unit / 2;
1120           continue;
1121         }
1122
1123       /* THISSIZE must not overrun a word boundary.  Otherwise,
1124          store_fixed_bit_field will call us again, and we will mutually
1125          recurse forever.  */
1126       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1127       thissize = MIN (thissize, unit - thispos);
1128
1129       if (BYTES_BIG_ENDIAN)
1130         {
1131           int total_bits;
1132
1133           /* We must do an endian conversion exactly the same way as it is
1134              done in extract_bit_field, so that the two calls to
1135              extract_fixed_bit_field will have comparable arguments.  */
1136           if (!MEM_P (value) || GET_MODE (value) == BLKmode)
1137             total_bits = BITS_PER_WORD;
1138           else
1139             total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1140
1141           /* Fetch successively less significant portions.  */
1142           if (CONST_INT_P (value))
1143             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1144                              >> (bitsize - bitsdone - thissize))
1145                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1146           else
1147             /* The args are chosen so that the last part includes the
1148                lsb.  Give extract_bit_field the value it needs (with
1149                endianness compensation) to fetch the piece we want.  */
1150             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1151                                             total_bits - bitsize + bitsdone,
1152                                             NULL_RTX, 1, false);
1153         }
1154       else
1155         {
1156           /* Fetch successively more significant portions.  */
1157           if (CONST_INT_P (value))
1158             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1159                              >> bitsdone)
1160                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1161           else
1162             part = extract_fixed_bit_field (word_mode, value, 0, thissize,
1163                                             bitsdone, NULL_RTX, 1, false);
1164         }
1165
1166       /* If OP0 is a register, then handle OFFSET here.
1167
1168          When handling multiword bitfields, extract_bit_field may pass
1169          down a word_mode SUBREG of a larger REG for a bitfield that actually
1170          crosses a word boundary.  Thus, for a SUBREG, we must find
1171          the current word starting from the base register.  */
1172       if (GET_CODE (op0) == SUBREG)
1173         {
1174           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1175           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1176           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1177             word = word_offset ? const0_rtx : op0;
1178           else
1179             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1180                                           GET_MODE (SUBREG_REG (op0)));
1181           offset = 0;
1182         }
1183       else if (REG_P (op0))
1184         {
1185           enum machine_mode op0_mode = GET_MODE (op0);
1186           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1187             word = offset ? const0_rtx : op0;
1188           else
1189             word = operand_subword_force (op0, offset, GET_MODE (op0));
1190           offset = 0;
1191         }
1192       else
1193         word = op0;
1194
1195       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1196          it is just an out-of-bounds access.  Ignore it.  */
1197       if (word != const0_rtx)
1198         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1199                                bitregion_start, bitregion_end, part);
1200       bitsdone += thissize;
1201     }
1202 }
1203 \f
1204 /* A subroutine of extract_bit_field_1 that converts return value X
1205    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1206    to extract_bit_field.  */
1207
1208 static rtx
1209 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1210                              enum machine_mode tmode, bool unsignedp)
1211 {
1212   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1213     return x;
1214
1215   /* If the x mode is not a scalar integral, first convert to the
1216      integer mode of that size and then access it as a floating-point
1217      value via a SUBREG.  */
1218   if (!SCALAR_INT_MODE_P (tmode))
1219     {
1220       enum machine_mode smode;
1221
1222       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1223       x = convert_to_mode (smode, x, unsignedp);
1224       x = force_reg (smode, x);
1225       return gen_lowpart (tmode, x);
1226     }
1227
1228   return convert_to_mode (tmode, x, unsignedp);
1229 }
1230
1231 /* A subroutine of extract_bit_field, with the same arguments.
1232    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1233    if we can find no other means of implementing the operation.
1234    if FALLBACK_P is false, return NULL instead.  */
1235
1236 static rtx
1237 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1238                      unsigned HOST_WIDE_INT bitnum,
1239                      int unsignedp, bool packedp, rtx target,
1240                      enum machine_mode mode, enum machine_mode tmode,
1241                      bool fallback_p)
1242 {
1243   unsigned int unit
1244     = (MEM_P (str_rtx)) ? BITS_PER_UNIT : BITS_PER_WORD;
1245   unsigned HOST_WIDE_INT offset, bitpos;
1246   rtx op0 = str_rtx;
1247   enum machine_mode int_mode;
1248   enum machine_mode ext_mode;
1249   enum machine_mode mode1;
1250   int byte_offset;
1251
1252   if (tmode == VOIDmode)
1253     tmode = mode;
1254
1255   while (GET_CODE (op0) == SUBREG)
1256     {
1257       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1258       op0 = SUBREG_REG (op0);
1259     }
1260
1261   /* If we have an out-of-bounds access to a register, just return an
1262      uninitialized register of the required mode.  This can occur if the
1263      source code contains an out-of-bounds access to a small array.  */
1264   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1265     return gen_reg_rtx (tmode);
1266
1267   if (REG_P (op0)
1268       && mode == GET_MODE (op0)
1269       && bitnum == 0
1270       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1271     {
1272       /* We're trying to extract a full register from itself.  */
1273       return op0;
1274     }
1275
1276   /* See if we can get a better vector mode before extracting.  */
1277   if (VECTOR_MODE_P (GET_MODE (op0))
1278       && !MEM_P (op0)
1279       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1280     {
1281       enum machine_mode new_mode;
1282
1283       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1284         new_mode = MIN_MODE_VECTOR_FLOAT;
1285       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1286         new_mode = MIN_MODE_VECTOR_FRACT;
1287       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1288         new_mode = MIN_MODE_VECTOR_UFRACT;
1289       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1290         new_mode = MIN_MODE_VECTOR_ACCUM;
1291       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1292         new_mode = MIN_MODE_VECTOR_UACCUM;
1293       else
1294         new_mode = MIN_MODE_VECTOR_INT;
1295
1296       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1297         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1298             && targetm.vector_mode_supported_p (new_mode))
1299           break;
1300       if (new_mode != VOIDmode)
1301         op0 = gen_lowpart (new_mode, op0);
1302     }
1303
1304   /* Use vec_extract patterns for extracting parts of vectors whenever
1305      available.  */
1306   if (VECTOR_MODE_P (GET_MODE (op0))
1307       && !MEM_P (op0)
1308       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1309       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1310           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1311     {
1312       struct expand_operand ops[3];
1313       enum machine_mode outermode = GET_MODE (op0);
1314       enum machine_mode innermode = GET_MODE_INNER (outermode);
1315       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1316       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1317
1318       create_output_operand (&ops[0], target, innermode);
1319       create_input_operand (&ops[1], op0, outermode);
1320       create_integer_operand (&ops[2], pos);
1321       if (maybe_expand_insn (icode, 3, ops))
1322         {
1323           target = ops[0].value;
1324           if (GET_MODE (target) != mode)
1325             return gen_lowpart (tmode, target);
1326           return target;
1327         }
1328     }
1329
1330   /* Make sure we are playing with integral modes.  Pun with subregs
1331      if we aren't.  */
1332   {
1333     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1334     if (imode != GET_MODE (op0))
1335       {
1336         if (MEM_P (op0))
1337           op0 = adjust_bitfield_address (op0, imode, 0);
1338         else if (imode != BLKmode)
1339           {
1340             op0 = gen_lowpart (imode, op0);
1341
1342             /* If we got a SUBREG, force it into a register since we
1343                aren't going to be able to do another SUBREG on it.  */
1344             if (GET_CODE (op0) == SUBREG)
1345               op0 = force_reg (imode, op0);
1346           }
1347         else if (REG_P (op0))
1348           {
1349             rtx reg, subreg;
1350             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1351                                             MODE_INT);
1352             reg = gen_reg_rtx (imode);
1353             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1354             emit_move_insn (subreg, op0);
1355             op0 = reg;
1356             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1357           }
1358         else
1359           {
1360             rtx mem = assign_stack_temp (GET_MODE (op0),
1361                                          GET_MODE_SIZE (GET_MODE (op0)));
1362             emit_move_insn (mem, op0);
1363             op0 = adjust_bitfield_address (mem, BLKmode, 0);
1364           }
1365       }
1366   }
1367
1368   /* Extraction of a full-word or multi-word value from a structure
1369      in a register or aligned memory can be done with just a SUBREG.
1370      A subword value in the least significant part of a register
1371      can also be extracted with a SUBREG.  For this, we need the
1372      byte offset of the value in op0.  */
1373
1374   bitpos = bitnum % unit;
1375   offset = bitnum / unit;
1376   byte_offset = bitpos / BITS_PER_UNIT + offset * UNITS_PER_WORD;
1377
1378   /* If OP0 is a register, BITPOS must count within a word.
1379      But as we have it, it counts within whatever size OP0 now has.
1380      On a bigendian machine, these are not the same, so convert.  */
1381   if (BYTES_BIG_ENDIAN
1382       && !MEM_P (op0)
1383       && unit > GET_MODE_BITSIZE (GET_MODE (op0)))
1384     bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1385
1386   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1387      If that's wrong, the solution is to test for it and set TARGET to 0
1388      if needed.  */
1389
1390   /* Only scalar integer modes can be converted via subregs.  There is an
1391      additional problem for FP modes here in that they can have a precision
1392      which is different from the size.  mode_for_size uses precision, but
1393      we want a mode based on the size, so we must avoid calling it for FP
1394      modes.  */
1395   mode1  = (SCALAR_INT_MODE_P (tmode)
1396             ? mode_for_size (bitsize, GET_MODE_CLASS (tmode), 0)
1397             : mode);
1398
1399   /* If the bitfield is volatile, we need to make sure the access
1400      remains on a type-aligned boundary.  */
1401   if (GET_CODE (op0) == MEM
1402       && MEM_VOLATILE_P (op0)
1403       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1404       && flag_strict_volatile_bitfields > 0)
1405     goto no_subreg_mode_swap;
1406
1407   if (((bitsize >= BITS_PER_WORD && bitsize == GET_MODE_BITSIZE (mode)
1408         && bitpos % BITS_PER_WORD == 0)
1409        || (mode1 != BLKmode
1410            /* ??? The big endian test here is wrong.  This is correct
1411               if the value is in a register, and if mode_for_size is not
1412               the same mode as op0.  This causes us to get unnecessarily
1413               inefficient code from the Thumb port when -mbig-endian.  */
1414            && (BYTES_BIG_ENDIAN
1415                ? bitpos + bitsize == BITS_PER_WORD
1416                : bitpos == 0)))
1417       && ((!MEM_P (op0)
1418            && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0))
1419            && GET_MODE_SIZE (mode1) != 0
1420            && byte_offset % GET_MODE_SIZE (mode1) == 0)
1421           || (MEM_P (op0)
1422               && (! SLOW_UNALIGNED_ACCESS (mode, MEM_ALIGN (op0))
1423                   || (offset * BITS_PER_UNIT % bitsize == 0
1424                       && MEM_ALIGN (op0) % bitsize == 0)))))
1425     {
1426       if (MEM_P (op0))
1427         op0 = adjust_bitfield_address (op0, mode1, offset);
1428       else if (mode1 != GET_MODE (op0))
1429         {
1430           rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1431                                          byte_offset);
1432           if (sub == NULL)
1433             goto no_subreg_mode_swap;
1434           op0 = sub;
1435         }
1436       if (mode1 != mode)
1437         return convert_to_mode (tmode, op0, unsignedp);
1438       return op0;
1439     }
1440  no_subreg_mode_swap:
1441
1442   /* Handle fields bigger than a word.  */
1443
1444   if (bitsize > BITS_PER_WORD)
1445     {
1446       /* Here we transfer the words of the field
1447          in the order least significant first.
1448          This is because the most significant word is the one which may
1449          be less than full.  */
1450
1451       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1452       unsigned int i;
1453       rtx last;
1454
1455       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1456         target = gen_reg_rtx (mode);
1457
1458       /* Indicate for flow that the entire target reg is being set.  */
1459       emit_clobber (target);
1460
1461       last = get_last_insn ();
1462       for (i = 0; i < nwords; i++)
1463         {
1464           /* If I is 0, use the low-order word in both field and target;
1465              if I is 1, use the next to lowest word; and so on.  */
1466           /* Word number in TARGET to use.  */
1467           unsigned int wordnum
1468             = (WORDS_BIG_ENDIAN
1469                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1470                : i);
1471           /* Offset from start of field in OP0.  */
1472           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1473                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1474                                                 * (int) BITS_PER_WORD))
1475                                      : (int) i * BITS_PER_WORD);
1476           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1477           rtx result_part
1478             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1479                                              bitsize - i * BITS_PER_WORD),
1480                                    bitnum + bit_offset, 1, false, target_part,
1481                                    mode, word_mode, fallback_p);
1482
1483           gcc_assert (target_part);
1484           if (!result_part)
1485             {
1486               delete_insns_since (last);
1487               return NULL;
1488             }
1489
1490           if (result_part != target_part)
1491             emit_move_insn (target_part, result_part);
1492         }
1493
1494       if (unsignedp)
1495         {
1496           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1497              need to be zero'd out.  */
1498           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1499             {
1500               unsigned int i, total_words;
1501
1502               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1503               for (i = nwords; i < total_words; i++)
1504                 emit_move_insn
1505                   (operand_subword (target,
1506                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1507                                     1, VOIDmode),
1508                    const0_rtx);
1509             }
1510           return target;
1511         }
1512
1513       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1514       target = expand_shift (LSHIFT_EXPR, mode, target,
1515                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1516       return expand_shift (RSHIFT_EXPR, mode, target,
1517                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1518     }
1519
1520   /* From here on we know the desired field is smaller than a word.  */
1521
1522   /* Check if there is a correspondingly-sized integer field, so we can
1523      safely extract it as one size of integer, if necessary; then
1524      truncate or extend to the size that is wanted; then use SUBREGs or
1525      convert_to_mode to get one of the modes we really wanted.  */
1526
1527   int_mode = int_mode_for_mode (tmode);
1528   if (int_mode == BLKmode)
1529     int_mode = int_mode_for_mode (mode);
1530   /* Should probably push op0 out to memory and then do a load.  */
1531   gcc_assert (int_mode != BLKmode);
1532
1533   /* OFFSET is the number of words or bytes (UNIT says which)
1534      from STR_RTX to the first word or byte containing part of the field.  */
1535   if (!MEM_P (op0))
1536     {
1537       if (offset != 0
1538           || GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1539         {
1540           if (!REG_P (op0))
1541             op0 = copy_to_reg (op0);
1542           op0 = gen_rtx_SUBREG (mode_for_size (BITS_PER_WORD, MODE_INT, 0),
1543                                 op0, (offset * UNITS_PER_WORD));
1544         }
1545       offset = 0;
1546     }
1547
1548   /* Now OFFSET is nonzero only for memory operands.  */
1549   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1550   if (ext_mode != MAX_MACHINE_MODE
1551       && bitsize > 0
1552       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1553       /* Do not use extv/extzv for volatile bitfields when
1554          -fstrict-volatile-bitfields is in effect.  */
1555       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1556            && flag_strict_volatile_bitfields > 0)
1557       /* If op0 is a register, we need it in EXT_MODE to make it
1558          acceptable to the format of ext(z)v.  */
1559       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode)
1560       && !((REG_P (op0) || GET_CODE (op0) == SUBREG)
1561            && (bitsize + bitpos > GET_MODE_BITSIZE (ext_mode))))
1562     {
1563       struct expand_operand ops[4];
1564       unsigned HOST_WIDE_INT xbitpos = bitpos, xoffset = offset;
1565       rtx xop0 = op0;
1566       rtx xtarget = target;
1567       rtx xspec_target = target;
1568       rtx xspec_target_subreg = 0;
1569
1570       /* If op0 is a register, we need it in EXT_MODE to make it
1571          acceptable to the format of ext(z)v.  */
1572       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1573         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1574       if (MEM_P (xop0))
1575         /* Get ref to first byte containing part of the field.  */
1576         xop0 = adjust_bitfield_address (xop0, byte_mode, xoffset);
1577
1578       /* Now convert from counting within UNIT to counting in EXT_MODE.  */
1579       if (BYTES_BIG_ENDIAN && !MEM_P (xop0))
1580         xbitpos += GET_MODE_BITSIZE (ext_mode) - unit;
1581
1582       unit = GET_MODE_BITSIZE (ext_mode);
1583
1584       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1585          "backwards" from the size of the unit we are extracting from.
1586          Otherwise, we count bits from the most significant on a
1587          BYTES/BITS_BIG_ENDIAN machine.  */
1588
1589       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1590         xbitpos = unit - bitsize - xbitpos;
1591
1592       if (xtarget == 0)
1593         xtarget = xspec_target = gen_reg_rtx (tmode);
1594
1595       if (GET_MODE (xtarget) != ext_mode)
1596         {
1597           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1598              between the mode of the extraction (word_mode) and the target
1599              mode.  Instead, create a temporary and use convert_move to set
1600              the target.  */
1601           if (REG_P (xtarget)
1602               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1603             {
1604               xtarget = gen_lowpart (ext_mode, xtarget);
1605               if (GET_MODE_PRECISION (ext_mode)
1606                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1607                 xspec_target_subreg = xtarget;
1608             }
1609           else
1610             xtarget = gen_reg_rtx (ext_mode);
1611         }
1612
1613       create_output_operand (&ops[0], xtarget, ext_mode);
1614       create_fixed_operand (&ops[1], xop0);
1615       create_integer_operand (&ops[2], bitsize);
1616       create_integer_operand (&ops[3], xbitpos);
1617       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1618                              4, ops))
1619         {
1620           xtarget = ops[0].value;
1621           if (xtarget == xspec_target)
1622             return xtarget;
1623           if (xtarget == xspec_target_subreg)
1624             return xspec_target;
1625           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1626         }
1627     }
1628
1629   /* If OP0 is a memory, try copying it to a register and seeing if a
1630      cheap register alternative is available.  */
1631   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1632     {
1633       enum machine_mode bestmode;
1634
1635       /* Get the mode to use for inserting into this field.  If
1636          OP0 is BLKmode, get the smallest mode consistent with the
1637          alignment. If OP0 is a non-BLKmode object that is no
1638          wider than EXT_MODE, use its mode. Otherwise, use the
1639          smallest mode containing the field.  */
1640
1641       if (GET_MODE (op0) == BLKmode
1642           || (ext_mode != MAX_MACHINE_MODE
1643               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1644         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1645                                   (ext_mode == MAX_MACHINE_MODE
1646                                    ? VOIDmode : ext_mode),
1647                                   MEM_VOLATILE_P (op0));
1648       else
1649         bestmode = GET_MODE (op0);
1650
1651       if (bestmode != VOIDmode
1652           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1653                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1654         {
1655           unsigned HOST_WIDE_INT xoffset, xbitpos;
1656
1657           /* Compute the offset as a multiple of this unit,
1658              counting in bytes.  */
1659           unit = GET_MODE_BITSIZE (bestmode);
1660           xoffset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1661           xbitpos = bitnum % unit;
1662
1663           /* Make sure the register is big enough for the whole field.  */
1664           if (xoffset * BITS_PER_UNIT + unit
1665               >= offset * BITS_PER_UNIT + bitsize)
1666             {
1667               rtx last, result, xop0;
1668
1669               last = get_last_insn ();
1670
1671               /* Fetch it to a register in that size.  */
1672               xop0 = adjust_bitfield_address (op0, bestmode, xoffset);
1673               xop0 = force_reg (bestmode, xop0);
1674               result = extract_bit_field_1 (xop0, bitsize, xbitpos,
1675                                             unsignedp, packedp, target,
1676                                             mode, tmode, false);
1677               if (result)
1678                 return result;
1679
1680               delete_insns_since (last);
1681             }
1682         }
1683     }
1684
1685   if (!fallback_p)
1686     return NULL;
1687
1688   target = extract_fixed_bit_field (int_mode, op0, offset, bitsize,
1689                                     bitpos, target, unsignedp, packedp);
1690   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1691 }
1692
1693 /* Generate code to extract a byte-field from STR_RTX
1694    containing BITSIZE bits, starting at BITNUM,
1695    and put it in TARGET if possible (if TARGET is nonzero).
1696    Regardless of TARGET, we return the rtx for where the value is placed.
1697
1698    STR_RTX is the structure containing the byte (a REG or MEM).
1699    UNSIGNEDP is nonzero if this is an unsigned bit field.
1700    PACKEDP is nonzero if the field has the packed attribute.
1701    MODE is the natural mode of the field value once extracted.
1702    TMODE is the mode the caller would like the value to have;
1703    but the value may be returned with type MODE instead.
1704
1705    If a TARGET is specified and we can store in it at no extra cost,
1706    we do so, and return TARGET.
1707    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1708    if they are equally easy.  */
1709
1710 rtx
1711 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1712                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1713                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1714 {
1715   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1716                               target, mode, tmode, true);
1717 }
1718 \f
1719 /* Extract a bit field using shifts and boolean operations
1720    Returns an rtx to represent the value.
1721    OP0 addresses a register (word) or memory (byte).
1722    BITPOS says which bit within the word or byte the bit field starts in.
1723    OFFSET says how many bytes farther the bit field starts;
1724     it is 0 if OP0 is a register.
1725    BITSIZE says how many bits long the bit field is.
1726     (If OP0 is a register, it may be narrower than a full word,
1727      but BITPOS still counts within a full word,
1728      which is significant on bigendian machines.)
1729
1730    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1731    PACKEDP is true if the field has the packed attribute.
1732
1733    If TARGET is nonzero, attempts to store the value there
1734    and return TARGET, but this is not guaranteed.
1735    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1736
1737 static rtx
1738 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1739                          unsigned HOST_WIDE_INT offset,
1740                          unsigned HOST_WIDE_INT bitsize,
1741                          unsigned HOST_WIDE_INT bitpos, rtx target,
1742                          int unsignedp, bool packedp)
1743 {
1744   unsigned int total_bits = BITS_PER_WORD;
1745   enum machine_mode mode;
1746
1747   if (GET_CODE (op0) == SUBREG || REG_P (op0))
1748     {
1749       /* Special treatment for a bit field split across two registers.  */
1750       if (bitsize + bitpos > BITS_PER_WORD)
1751         return extract_split_bit_field (op0, bitsize, bitpos, unsignedp);
1752     }
1753   else
1754     {
1755       /* Get the proper mode to use for this field.  We want a mode that
1756          includes the entire field.  If such a mode would be larger than
1757          a word, we won't be doing the extraction the normal way.  */
1758
1759       if (MEM_VOLATILE_P (op0)
1760           && flag_strict_volatile_bitfields > 0)
1761         {
1762           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1763             mode = GET_MODE (op0);
1764           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1765             mode = GET_MODE (target);
1766           else
1767             mode = tmode;
1768         }
1769       else
1770         mode = get_best_mode (bitsize, bitpos + offset * BITS_PER_UNIT, 0, 0,
1771                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1772
1773       if (mode == VOIDmode)
1774         /* The only way this should occur is if the field spans word
1775            boundaries.  */
1776         return extract_split_bit_field (op0, bitsize,
1777                                         bitpos + offset * BITS_PER_UNIT,
1778                                         unsignedp);
1779
1780       total_bits = GET_MODE_BITSIZE (mode);
1781
1782       /* Make sure bitpos is valid for the chosen mode.  Adjust BITPOS to
1783          be in the range 0 to total_bits-1, and put any excess bytes in
1784          OFFSET.  */
1785       if (bitpos >= total_bits)
1786         {
1787           offset += (bitpos / total_bits) * (total_bits / BITS_PER_UNIT);
1788           bitpos -= ((bitpos / total_bits) * (total_bits / BITS_PER_UNIT)
1789                      * BITS_PER_UNIT);
1790         }
1791
1792       /* If we're accessing a volatile MEM, we can't do the next
1793          alignment step if it results in a multi-word access where we
1794          otherwise wouldn't have one.  So, check for that case
1795          here.  */
1796       if (MEM_P (op0)
1797           && MEM_VOLATILE_P (op0)
1798           && flag_strict_volatile_bitfields > 0
1799           && bitpos + bitsize <= total_bits
1800           && bitpos + bitsize + (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT > total_bits)
1801         {
1802           if (STRICT_ALIGNMENT)
1803             {
1804               static bool informed_about_misalignment = false;
1805               bool warned;
1806
1807               if (packedp)
1808                 {
1809                   if (bitsize == total_bits)
1810                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1811                                          "multiple accesses to volatile structure member"
1812                                          " because of packed attribute");
1813                   else
1814                     warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1815                                          "multiple accesses to volatile structure bitfield"
1816                                          " because of packed attribute");
1817
1818                   return extract_split_bit_field (op0, bitsize,
1819                                                   bitpos + offset * BITS_PER_UNIT,
1820                                                   unsignedp);
1821                 }
1822
1823               if (bitsize == total_bits)
1824                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1825                                      "mis-aligned access used for structure member");
1826               else
1827                 warned = warning_at (input_location, OPT_fstrict_volatile_bitfields,
1828                                      "mis-aligned access used for structure bitfield");
1829
1830               if (! informed_about_misalignment && warned)
1831                 {
1832                   informed_about_misalignment = true;
1833                   inform (input_location,
1834                           "when a volatile object spans multiple type-sized locations,"
1835                           " the compiler must choose between using a single mis-aligned access to"
1836                           " preserve the volatility, or using multiple aligned accesses to avoid"
1837                           " runtime faults; this code may fail at runtime if the hardware does"
1838                           " not allow this access");
1839                 }
1840             }
1841         }
1842       else
1843         {
1844
1845           /* Get ref to an aligned byte, halfword, or word containing the field.
1846              Adjust BITPOS to be position within a word,
1847              and OFFSET to be the offset of that word.
1848              Then alter OP0 to refer to that word.  */
1849           bitpos += (offset % (total_bits / BITS_PER_UNIT)) * BITS_PER_UNIT;
1850           offset -= (offset % (total_bits / BITS_PER_UNIT));
1851         }
1852
1853       op0 = adjust_bitfield_address (op0, mode, offset);
1854     }
1855
1856   mode = GET_MODE (op0);
1857
1858   if (BYTES_BIG_ENDIAN)
1859     /* BITPOS is the distance between our msb and that of OP0.
1860        Convert it to the distance from the lsb.  */
1861     bitpos = total_bits - bitsize - bitpos;
1862
1863   /* Now BITPOS is always the distance between the field's lsb and that of OP0.
1864      We have reduced the big-endian case to the little-endian case.  */
1865
1866   if (unsignedp)
1867     {
1868       if (bitpos)
1869         {
1870           /* If the field does not already start at the lsb,
1871              shift it so it does.  */
1872           /* Maybe propagate the target for the shift.  */
1873           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1874           if (tmode != mode)
1875             subtarget = 0;
1876           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitpos, subtarget, 1);
1877         }
1878       /* Convert the value to the desired mode.  */
1879       if (mode != tmode)
1880         op0 = convert_to_mode (tmode, op0, 1);
1881
1882       /* Unless the msb of the field used to be the msb when we shifted,
1883          mask out the upper bits.  */
1884
1885       if (GET_MODE_BITSIZE (mode) != bitpos + bitsize)
1886         return expand_binop (GET_MODE (op0), and_optab, op0,
1887                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1888                              target, 1, OPTAB_LIB_WIDEN);
1889       return op0;
1890     }
1891
1892   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1893      then arithmetic-shift its lsb to the lsb of the word.  */
1894   op0 = force_reg (mode, op0);
1895
1896   /* Find the narrowest integer mode that contains the field.  */
1897
1898   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1899        mode = GET_MODE_WIDER_MODE (mode))
1900     if (GET_MODE_BITSIZE (mode) >= bitsize + bitpos)
1901       {
1902         op0 = convert_to_mode (mode, op0, 0);
1903         break;
1904       }
1905
1906   if (mode != tmode)
1907     target = 0;
1908
1909   if (GET_MODE_BITSIZE (mode) != (bitsize + bitpos))
1910     {
1911       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitpos);
1912       /* Maybe propagate the target for the shift.  */
1913       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1914       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1915     }
1916
1917   return expand_shift (RSHIFT_EXPR, mode, op0,
1918                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1919 }
1920 \f
1921 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1922    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1923    complement of that if COMPLEMENT.  The mask is truncated if
1924    necessary to the width of mode MODE.  The mask is zero-extended if
1925    BITSIZE+BITPOS is too small for MODE.  */
1926
1927 static rtx
1928 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1929 {
1930   double_int mask;
1931
1932   mask = double_int::mask (bitsize);
1933   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1934
1935   if (complement)
1936     mask = ~mask;
1937
1938   return immed_double_int_const (mask, mode);
1939 }
1940
1941 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1942    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1943
1944 static rtx
1945 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1946 {
1947   double_int val;
1948
1949   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1950   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1951
1952   return immed_double_int_const (val, mode);
1953 }
1954 \f
1955 /* Extract a bit field that is split across two words
1956    and return an RTX for the result.
1957
1958    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1959    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1960    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1961
1962 static rtx
1963 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1964                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1965 {
1966   unsigned int unit;
1967   unsigned int bitsdone = 0;
1968   rtx result = NULL_RTX;
1969   int first = 1;
1970
1971   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1972      much at a time.  */
1973   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1974     unit = BITS_PER_WORD;
1975   else
1976     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1977
1978   while (bitsdone < bitsize)
1979     {
1980       unsigned HOST_WIDE_INT thissize;
1981       rtx part, word;
1982       unsigned HOST_WIDE_INT thispos;
1983       unsigned HOST_WIDE_INT offset;
1984
1985       offset = (bitpos + bitsdone) / unit;
1986       thispos = (bitpos + bitsdone) % unit;
1987
1988       /* THISSIZE must not overrun a word boundary.  Otherwise,
1989          extract_fixed_bit_field will call us again, and we will mutually
1990          recurse forever.  */
1991       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1992       thissize = MIN (thissize, unit - thispos);
1993
1994       /* If OP0 is a register, then handle OFFSET here.
1995
1996          When handling multiword bitfields, extract_bit_field may pass
1997          down a word_mode SUBREG of a larger REG for a bitfield that actually
1998          crosses a word boundary.  Thus, for a SUBREG, we must find
1999          the current word starting from the base register.  */
2000       if (GET_CODE (op0) == SUBREG)
2001         {
2002           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
2003           word = operand_subword_force (SUBREG_REG (op0), word_offset,
2004                                         GET_MODE (SUBREG_REG (op0)));
2005           offset = 0;
2006         }
2007       else if (REG_P (op0))
2008         {
2009           word = operand_subword_force (op0, offset, GET_MODE (op0));
2010           offset = 0;
2011         }
2012       else
2013         word = op0;
2014
2015       /* Extract the parts in bit-counting order,
2016          whose meaning is determined by BYTES_PER_UNIT.
2017          OFFSET is in UNITs, and UNIT is in bits.
2018          extract_fixed_bit_field wants offset in bytes.  */
2019       part = extract_fixed_bit_field (word_mode, word,
2020                                       offset * unit / BITS_PER_UNIT,
2021                                       thissize, thispos, 0, 1, false);
2022       bitsdone += thissize;
2023
2024       /* Shift this part into place for the result.  */
2025       if (BYTES_BIG_ENDIAN)
2026         {
2027           if (bitsize != bitsdone)
2028             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2029                                  bitsize - bitsdone, 0, 1);
2030         }
2031       else
2032         {
2033           if (bitsdone != thissize)
2034             part = expand_shift (LSHIFT_EXPR, word_mode, part,
2035                                  bitsdone - thissize, 0, 1);
2036         }
2037
2038       if (first)
2039         result = part;
2040       else
2041         /* Combine the parts with bitwise or.  This works
2042            because we extracted each part as an unsigned bit field.  */
2043         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
2044                                OPTAB_LIB_WIDEN);
2045
2046       first = 0;
2047     }
2048
2049   /* Unsigned bit field: we are done.  */
2050   if (unsignedp)
2051     return result;
2052   /* Signed bit field: sign-extend with two arithmetic shifts.  */
2053   result = expand_shift (LSHIFT_EXPR, word_mode, result,
2054                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
2055   return expand_shift (RSHIFT_EXPR, word_mode, result,
2056                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
2057 }
2058 \f
2059 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
2060    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
2061    MODE, fill the upper bits with zeros.  Fail if the layout of either
2062    mode is unknown (as for CC modes) or if the extraction would involve
2063    unprofitable mode punning.  Return the value on success, otherwise
2064    return null.
2065
2066    This is different from gen_lowpart* in these respects:
2067
2068      - the returned value must always be considered an rvalue
2069
2070      - when MODE is wider than SRC_MODE, the extraction involves
2071        a zero extension
2072
2073      - when MODE is smaller than SRC_MODE, the extraction involves
2074        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2075
2076    In other words, this routine performs a computation, whereas the
2077    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2078    operations.  */
2079
2080 rtx
2081 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2082 {
2083   enum machine_mode int_mode, src_int_mode;
2084
2085   if (mode == src_mode)
2086     return src;
2087
2088   if (CONSTANT_P (src))
2089     {
2090       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2091          fails, it will happily create (subreg (symbol_ref)) or similar
2092          invalid SUBREGs.  */
2093       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2094       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2095       if (ret)
2096         return ret;
2097
2098       if (GET_MODE (src) == VOIDmode
2099           || !validate_subreg (mode, src_mode, src, byte))
2100         return NULL_RTX;
2101
2102       src = force_reg (GET_MODE (src), src);
2103       return gen_rtx_SUBREG (mode, src, byte);
2104     }
2105
2106   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2107     return NULL_RTX;
2108
2109   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2110       && MODES_TIEABLE_P (mode, src_mode))
2111     {
2112       rtx x = gen_lowpart_common (mode, src);
2113       if (x)
2114         return x;
2115     }
2116
2117   src_int_mode = int_mode_for_mode (src_mode);
2118   int_mode = int_mode_for_mode (mode);
2119   if (src_int_mode == BLKmode || int_mode == BLKmode)
2120     return NULL_RTX;
2121
2122   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2123     return NULL_RTX;
2124   if (!MODES_TIEABLE_P (int_mode, mode))
2125     return NULL_RTX;
2126
2127   src = gen_lowpart (src_int_mode, src);
2128   src = convert_modes (int_mode, src_int_mode, src, true);
2129   src = gen_lowpart (mode, src);
2130   return src;
2131 }
2132 \f
2133 /* Add INC into TARGET.  */
2134
2135 void
2136 expand_inc (rtx target, rtx inc)
2137 {
2138   rtx value = expand_binop (GET_MODE (target), add_optab,
2139                             target, inc,
2140                             target, 0, OPTAB_LIB_WIDEN);
2141   if (value != target)
2142     emit_move_insn (target, value);
2143 }
2144
2145 /* Subtract DEC from TARGET.  */
2146
2147 void
2148 expand_dec (rtx target, rtx dec)
2149 {
2150   rtx value = expand_binop (GET_MODE (target), sub_optab,
2151                             target, dec,
2152                             target, 0, OPTAB_LIB_WIDEN);
2153   if (value != target)
2154     emit_move_insn (target, value);
2155 }
2156 \f
2157 /* Output a shift instruction for expression code CODE,
2158    with SHIFTED being the rtx for the value to shift,
2159    and AMOUNT the rtx for the amount to shift by.
2160    Store the result in the rtx TARGET, if that is convenient.
2161    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2162    Return the rtx for where the value is.  */
2163
2164 static rtx
2165 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2166                 rtx amount, rtx target, int unsignedp)
2167 {
2168   rtx op1, temp = 0;
2169   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2170   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2171   optab lshift_optab = ashl_optab;
2172   optab rshift_arith_optab = ashr_optab;
2173   optab rshift_uns_optab = lshr_optab;
2174   optab lrotate_optab = rotl_optab;
2175   optab rrotate_optab = rotr_optab;
2176   enum machine_mode op1_mode;
2177   int attempt;
2178   bool speed = optimize_insn_for_speed_p ();
2179
2180   op1 = amount;
2181   op1_mode = GET_MODE (op1);
2182
2183   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2184      shift amount is a vector, use the vector/vector shift patterns.  */
2185   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2186     {
2187       lshift_optab = vashl_optab;
2188       rshift_arith_optab = vashr_optab;
2189       rshift_uns_optab = vlshr_optab;
2190       lrotate_optab = vrotl_optab;
2191       rrotate_optab = vrotr_optab;
2192     }
2193
2194   /* Previously detected shift-counts computed by NEGATE_EXPR
2195      and shifted in the other direction; but that does not work
2196      on all machines.  */
2197
2198   if (SHIFT_COUNT_TRUNCATED)
2199     {
2200       if (CONST_INT_P (op1)
2201           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2202               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2203         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2204                        % GET_MODE_BITSIZE (mode));
2205       else if (GET_CODE (op1) == SUBREG
2206                && subreg_lowpart_p (op1)
2207                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2208         op1 = SUBREG_REG (op1);
2209     }
2210
2211   if (op1 == const0_rtx)
2212     return shifted;
2213
2214   /* Check whether its cheaper to implement a left shift by a constant
2215      bit count by a sequence of additions.  */
2216   if (code == LSHIFT_EXPR
2217       && CONST_INT_P (op1)
2218       && INTVAL (op1) > 0
2219       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2220       && INTVAL (op1) < MAX_BITS_PER_WORD
2221       && (shift_cost (speed, mode, INTVAL (op1))
2222           > INTVAL (op1) * add_cost (speed, mode))
2223       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2224     {
2225       int i;
2226       for (i = 0; i < INTVAL (op1); i++)
2227         {
2228           temp = force_reg (mode, shifted);
2229           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2230                                   unsignedp, OPTAB_LIB_WIDEN);
2231         }
2232       return shifted;
2233     }
2234
2235   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2236     {
2237       enum optab_methods methods;
2238
2239       if (attempt == 0)
2240         methods = OPTAB_DIRECT;
2241       else if (attempt == 1)
2242         methods = OPTAB_WIDEN;
2243       else
2244         methods = OPTAB_LIB_WIDEN;
2245
2246       if (rotate)
2247         {
2248           /* Widening does not work for rotation.  */
2249           if (methods == OPTAB_WIDEN)
2250             continue;
2251           else if (methods == OPTAB_LIB_WIDEN)
2252             {
2253               /* If we have been unable to open-code this by a rotation,
2254                  do it as the IOR of two shifts.  I.e., to rotate A
2255                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2256                  where C is the bitsize of A.
2257
2258                  It is theoretically possible that the target machine might
2259                  not be able to perform either shift and hence we would
2260                  be making two libcalls rather than just the one for the
2261                  shift (similarly if IOR could not be done).  We will allow
2262                  this extremely unlikely lossage to avoid complicating the
2263                  code below.  */
2264
2265               rtx subtarget = target == shifted ? 0 : target;
2266               rtx new_amount, other_amount;
2267               rtx temp1;
2268
2269               new_amount = op1;
2270               if (CONST_INT_P (op1))
2271                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2272                                         - INTVAL (op1));
2273               else
2274                 other_amount
2275                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2276                                          GEN_INT (GET_MODE_PRECISION (mode)),
2277                                          op1);
2278
2279               shifted = force_reg (mode, shifted);
2280
2281               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2282                                      mode, shifted, new_amount, 0, 1);
2283               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2284                                       mode, shifted, other_amount,
2285                                       subtarget, 1);
2286               return expand_binop (mode, ior_optab, temp, temp1, target,
2287                                    unsignedp, methods);
2288             }
2289
2290           temp = expand_binop (mode,
2291                                left ? lrotate_optab : rrotate_optab,
2292                                shifted, op1, target, unsignedp, methods);
2293         }
2294       else if (unsignedp)
2295         temp = expand_binop (mode,
2296                              left ? lshift_optab : rshift_uns_optab,
2297                              shifted, op1, target, unsignedp, methods);
2298
2299       /* Do arithmetic shifts.
2300          Also, if we are going to widen the operand, we can just as well
2301          use an arithmetic right-shift instead of a logical one.  */
2302       if (temp == 0 && ! rotate
2303           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2304         {
2305           enum optab_methods methods1 = methods;
2306
2307           /* If trying to widen a log shift to an arithmetic shift,
2308              don't accept an arithmetic shift of the same size.  */
2309           if (unsignedp)
2310             methods1 = OPTAB_MUST_WIDEN;
2311
2312           /* Arithmetic shift */
2313
2314           temp = expand_binop (mode,
2315                                left ? lshift_optab : rshift_arith_optab,
2316                                shifted, op1, target, unsignedp, methods1);
2317         }
2318
2319       /* We used to try extzv here for logical right shifts, but that was
2320          only useful for one machine, the VAX, and caused poor code
2321          generation there for lshrdi3, so the code was deleted and a
2322          define_expand for lshrsi3 was added to vax.md.  */
2323     }
2324
2325   gcc_assert (temp);
2326   return temp;
2327 }
2328
2329 /* Output a shift instruction for expression code CODE,
2330    with SHIFTED being the rtx for the value to shift,
2331    and AMOUNT the amount to shift by.
2332    Store the result in the rtx TARGET, if that is convenient.
2333    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2334    Return the rtx for where the value is.  */
2335
2336 rtx
2337 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2338               int amount, rtx target, int unsignedp)
2339 {
2340   return expand_shift_1 (code, mode,
2341                          shifted, GEN_INT (amount), target, unsignedp);
2342 }
2343
2344 /* Output a shift instruction for expression code CODE,
2345    with SHIFTED being the rtx for the value to shift,
2346    and AMOUNT the tree for the amount to shift by.
2347    Store the result in the rtx TARGET, if that is convenient.
2348    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2349    Return the rtx for where the value is.  */
2350
2351 rtx
2352 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2353                        tree amount, rtx target, int unsignedp)
2354 {
2355   return expand_shift_1 (code, mode,
2356                          shifted, expand_normal (amount), target, unsignedp);
2357 }
2358
2359 \f
2360 /* Indicates the type of fixup needed after a constant multiplication.
2361    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2362    the result should be negated, and ADD_VARIANT means that the
2363    multiplicand should be added to the result.  */
2364 enum mult_variant {basic_variant, negate_variant, add_variant};
2365
2366 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2367                         const struct mult_cost *, enum machine_mode mode);
2368 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2369                                  struct algorithm *, enum mult_variant *, int);
2370 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2371                               const struct algorithm *, enum mult_variant);
2372 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2373 static rtx extract_high_half (enum machine_mode, rtx);
2374 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2375 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2376                                        int, int);
2377 /* Compute and return the best algorithm for multiplying by T.
2378    The algorithm must cost less than cost_limit
2379    If retval.cost >= COST_LIMIT, no algorithm was found and all
2380    other field of the returned struct are undefined.
2381    MODE is the machine mode of the multiplication.  */
2382
2383 static void
2384 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2385             const struct mult_cost *cost_limit, enum machine_mode mode)
2386 {
2387   int m;
2388   struct algorithm *alg_in, *best_alg;
2389   struct mult_cost best_cost;
2390   struct mult_cost new_limit;
2391   int op_cost, op_latency;
2392   unsigned HOST_WIDE_INT orig_t = t;
2393   unsigned HOST_WIDE_INT q;
2394   int maxm, hash_index;
2395   bool cache_hit = false;
2396   enum alg_code cache_alg = alg_zero;
2397   bool speed = optimize_insn_for_speed_p ();
2398   enum machine_mode imode;
2399   struct alg_hash_entry *entry_ptr;
2400
2401   /* Indicate that no algorithm is yet found.  If no algorithm
2402      is found, this value will be returned and indicate failure.  */
2403   alg_out->cost.cost = cost_limit->cost + 1;
2404   alg_out->cost.latency = cost_limit->latency + 1;
2405
2406   if (cost_limit->cost < 0
2407       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2408     return;
2409
2410   /* Be prepared for vector modes.  */
2411   imode = GET_MODE_INNER (mode);
2412   if (imode == VOIDmode)
2413     imode = mode;
2414
2415   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2416
2417   /* Restrict the bits of "t" to the multiplication's mode.  */
2418   t &= GET_MODE_MASK (imode);
2419
2420   /* t == 1 can be done in zero cost.  */
2421   if (t == 1)
2422     {
2423       alg_out->ops = 1;
2424       alg_out->cost.cost = 0;
2425       alg_out->cost.latency = 0;
2426       alg_out->op[0] = alg_m;
2427       return;
2428     }
2429
2430   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2431      fail now.  */
2432   if (t == 0)
2433     {
2434       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2435         return;
2436       else
2437         {
2438           alg_out->ops = 1;
2439           alg_out->cost.cost = zero_cost (speed);
2440           alg_out->cost.latency = zero_cost (speed);
2441           alg_out->op[0] = alg_zero;
2442           return;
2443         }
2444     }
2445
2446   /* We'll be needing a couple extra algorithm structures now.  */
2447
2448   alg_in = XALLOCA (struct algorithm);
2449   best_alg = XALLOCA (struct algorithm);
2450   best_cost = *cost_limit;
2451
2452   /* Compute the hash index.  */
2453   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2454
2455   /* See if we already know what to do for T.  */
2456   entry_ptr = alg_hash_entry_ptr (hash_index);
2457   if (entry_ptr->t == t
2458       && entry_ptr->mode == mode
2459       && entry_ptr->mode == mode
2460       && entry_ptr->speed == speed
2461       && entry_ptr->alg != alg_unknown)
2462     {
2463       cache_alg = entry_ptr->alg;
2464
2465       if (cache_alg == alg_impossible)
2466         {
2467           /* The cache tells us that it's impossible to synthesize
2468              multiplication by T within entry_ptr->cost.  */
2469           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2470             /* COST_LIMIT is at least as restrictive as the one
2471                recorded in the hash table, in which case we have no
2472                hope of synthesizing a multiplication.  Just
2473                return.  */
2474             return;
2475
2476           /* If we get here, COST_LIMIT is less restrictive than the
2477              one recorded in the hash table, so we may be able to
2478              synthesize a multiplication.  Proceed as if we didn't
2479              have the cache entry.  */
2480         }
2481       else
2482         {
2483           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2484             /* The cached algorithm shows that this multiplication
2485                requires more cost than COST_LIMIT.  Just return.  This
2486                way, we don't clobber this cache entry with
2487                alg_impossible but retain useful information.  */
2488             return;
2489
2490           cache_hit = true;
2491
2492           switch (cache_alg)
2493             {
2494             case alg_shift:
2495               goto do_alg_shift;
2496
2497             case alg_add_t_m2:
2498             case alg_sub_t_m2:
2499               goto do_alg_addsub_t_m2;
2500
2501             case alg_add_factor:
2502             case alg_sub_factor:
2503               goto do_alg_addsub_factor;
2504
2505             case alg_add_t2_m:
2506               goto do_alg_add_t2_m;
2507
2508             case alg_sub_t2_m:
2509               goto do_alg_sub_t2_m;
2510
2511             default:
2512               gcc_unreachable ();
2513             }
2514         }
2515     }
2516
2517   /* If we have a group of zero bits at the low-order part of T, try
2518      multiplying by the remaining bits and then doing a shift.  */
2519
2520   if ((t & 1) == 0)
2521     {
2522     do_alg_shift:
2523       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2524       if (m < maxm)
2525         {
2526           q = t >> m;
2527           /* The function expand_shift will choose between a shift and
2528              a sequence of additions, so the observed cost is given as
2529              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2530           op_cost = m * add_cost (speed, mode);
2531           if (shift_cost (speed, mode, m) < op_cost)
2532             op_cost = shift_cost (speed, mode, m);
2533           new_limit.cost = best_cost.cost - op_cost;
2534           new_limit.latency = best_cost.latency - op_cost;
2535           synth_mult (alg_in, q, &new_limit, mode);
2536
2537           alg_in->cost.cost += op_cost;
2538           alg_in->cost.latency += op_cost;
2539           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2540             {
2541               struct algorithm *x;
2542               best_cost = alg_in->cost;
2543               x = alg_in, alg_in = best_alg, best_alg = x;
2544               best_alg->log[best_alg->ops] = m;
2545               best_alg->op[best_alg->ops] = alg_shift;
2546             }
2547
2548           /* See if treating ORIG_T as a signed number yields a better
2549              sequence.  Try this sequence only for a negative ORIG_T
2550              as it would be useless for a non-negative ORIG_T.  */
2551           if ((HOST_WIDE_INT) orig_t < 0)
2552             {
2553               /* Shift ORIG_T as follows because a right shift of a
2554                  negative-valued signed type is implementation
2555                  defined.  */
2556               q = ~(~orig_t >> m);
2557               /* The function expand_shift will choose between a shift
2558                  and a sequence of additions, so the observed cost is
2559                  given as MIN (m * add_cost(speed, mode),
2560                  shift_cost(speed, mode, m)).  */
2561               op_cost = m * add_cost (speed, mode);
2562               if (shift_cost (speed, mode, m) < op_cost)
2563                 op_cost = shift_cost (speed, mode, m);
2564               new_limit.cost = best_cost.cost - op_cost;
2565               new_limit.latency = best_cost.latency - op_cost;
2566               synth_mult (alg_in, q, &new_limit, mode);
2567
2568               alg_in->cost.cost += op_cost;
2569               alg_in->cost.latency += op_cost;
2570               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2571                 {
2572                   struct algorithm *x;
2573                   best_cost = alg_in->cost;
2574                   x = alg_in, alg_in = best_alg, best_alg = x;
2575                   best_alg->log[best_alg->ops] = m;
2576                   best_alg->op[best_alg->ops] = alg_shift;
2577                 }
2578             }
2579         }
2580       if (cache_hit)
2581         goto done;
2582     }
2583
2584   /* If we have an odd number, add or subtract one.  */
2585   if ((t & 1) != 0)
2586     {
2587       unsigned HOST_WIDE_INT w;
2588
2589     do_alg_addsub_t_m2:
2590       for (w = 1; (w & t) != 0; w <<= 1)
2591         ;
2592       /* If T was -1, then W will be zero after the loop.  This is another
2593          case where T ends with ...111.  Handling this with (T + 1) and
2594          subtract 1 produces slightly better code and results in algorithm
2595          selection much faster than treating it like the ...0111 case
2596          below.  */
2597       if (w == 0
2598           || (w > 2
2599               /* Reject the case where t is 3.
2600                  Thus we prefer addition in that case.  */
2601               && t != 3))
2602         {
2603           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2604
2605           op_cost = add_cost (speed, mode);
2606           new_limit.cost = best_cost.cost - op_cost;
2607           new_limit.latency = best_cost.latency - op_cost;
2608           synth_mult (alg_in, t + 1, &new_limit, mode);
2609
2610           alg_in->cost.cost += op_cost;
2611           alg_in->cost.latency += op_cost;
2612           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2613             {
2614               struct algorithm *x;
2615               best_cost = alg_in->cost;
2616               x = alg_in, alg_in = best_alg, best_alg = x;
2617               best_alg->log[best_alg->ops] = 0;
2618               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2619             }
2620         }
2621       else
2622         {
2623           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2624
2625           op_cost = add_cost (speed, mode);
2626           new_limit.cost = best_cost.cost - op_cost;
2627           new_limit.latency = best_cost.latency - op_cost;
2628           synth_mult (alg_in, t - 1, &new_limit, mode);
2629
2630           alg_in->cost.cost += op_cost;
2631           alg_in->cost.latency += op_cost;
2632           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2633             {
2634               struct algorithm *x;
2635               best_cost = alg_in->cost;
2636               x = alg_in, alg_in = best_alg, best_alg = x;
2637               best_alg->log[best_alg->ops] = 0;
2638               best_alg->op[best_alg->ops] = alg_add_t_m2;
2639             }
2640         }
2641
2642       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2643          quickly with a - a * n for some appropriate constant n.  */
2644       m = exact_log2 (-orig_t + 1);
2645       if (m >= 0 && m < maxm)
2646         {
2647           op_cost = shiftsub1_cost (speed, mode, m);
2648           new_limit.cost = best_cost.cost - op_cost;
2649           new_limit.latency = best_cost.latency - op_cost;
2650           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2651                       &new_limit, mode);
2652
2653           alg_in->cost.cost += op_cost;
2654           alg_in->cost.latency += op_cost;
2655           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2656             {
2657               struct algorithm *x;
2658               best_cost = alg_in->cost;
2659               x = alg_in, alg_in = best_alg, best_alg = x;
2660               best_alg->log[best_alg->ops] = m;
2661               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2662             }
2663         }
2664
2665       if (cache_hit)
2666         goto done;
2667     }
2668
2669   /* Look for factors of t of the form
2670      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2671      If we find such a factor, we can multiply by t using an algorithm that
2672      multiplies by q, shift the result by m and add/subtract it to itself.
2673
2674      We search for large factors first and loop down, even if large factors
2675      are less probable than small; if we find a large factor we will find a
2676      good sequence quickly, and therefore be able to prune (by decreasing
2677      COST_LIMIT) the search.  */
2678
2679  do_alg_addsub_factor:
2680   for (m = floor_log2 (t - 1); m >= 2; m--)
2681     {
2682       unsigned HOST_WIDE_INT d;
2683
2684       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2685       if (t % d == 0 && t > d && m < maxm
2686           && (!cache_hit || cache_alg == alg_add_factor))
2687         {
2688           /* If the target has a cheap shift-and-add instruction use
2689              that in preference to a shift insn followed by an add insn.
2690              Assume that the shift-and-add is "atomic" with a latency
2691              equal to its cost, otherwise assume that on superscalar
2692              hardware the shift may be executed concurrently with the
2693              earlier steps in the algorithm.  */
2694           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2695           if (shiftadd_cost (speed, mode, m) < op_cost)
2696             {
2697               op_cost = shiftadd_cost (speed, mode, m);
2698               op_latency = op_cost;
2699             }
2700           else
2701             op_latency = add_cost (speed, mode);
2702
2703           new_limit.cost = best_cost.cost - op_cost;
2704           new_limit.latency = best_cost.latency - op_latency;
2705           synth_mult (alg_in, t / d, &new_limit, mode);
2706
2707           alg_in->cost.cost += op_cost;
2708           alg_in->cost.latency += op_latency;
2709           if (alg_in->cost.latency < op_cost)
2710             alg_in->cost.latency = op_cost;
2711           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2712             {
2713               struct algorithm *x;
2714               best_cost = alg_in->cost;
2715               x = alg_in, alg_in = best_alg, best_alg = x;
2716               best_alg->log[best_alg->ops] = m;
2717               best_alg->op[best_alg->ops] = alg_add_factor;
2718             }
2719           /* Other factors will have been taken care of in the recursion.  */
2720           break;
2721         }
2722
2723       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2724       if (t % d == 0 && t > d && m < maxm
2725           && (!cache_hit || cache_alg == alg_sub_factor))
2726         {
2727           /* If the target has a cheap shift-and-subtract insn use
2728              that in preference to a shift insn followed by a sub insn.
2729              Assume that the shift-and-sub is "atomic" with a latency
2730              equal to it's cost, otherwise assume that on superscalar
2731              hardware the shift may be executed concurrently with the
2732              earlier steps in the algorithm.  */
2733           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2734           if (shiftsub0_cost (speed, mode, m) < op_cost)
2735             {
2736               op_cost = shiftsub0_cost (speed, mode, m);
2737               op_latency = op_cost;
2738             }
2739           else
2740             op_latency = add_cost (speed, mode);
2741
2742           new_limit.cost = best_cost.cost - op_cost;
2743           new_limit.latency = best_cost.latency - op_latency;
2744           synth_mult (alg_in, t / d, &new_limit, mode);
2745
2746           alg_in->cost.cost += op_cost;
2747           alg_in->cost.latency += op_latency;
2748           if (alg_in->cost.latency < op_cost)
2749             alg_in->cost.latency = op_cost;
2750           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2751             {
2752               struct algorithm *x;
2753               best_cost = alg_in->cost;
2754               x = alg_in, alg_in = best_alg, best_alg = x;
2755               best_alg->log[best_alg->ops] = m;
2756               best_alg->op[best_alg->ops] = alg_sub_factor;
2757             }
2758           break;
2759         }
2760     }
2761   if (cache_hit)
2762     goto done;
2763
2764   /* Try shift-and-add (load effective address) instructions,
2765      i.e. do a*3, a*5, a*9.  */
2766   if ((t & 1) != 0)
2767     {
2768     do_alg_add_t2_m:
2769       q = t - 1;
2770       q = q & -q;
2771       m = exact_log2 (q);
2772       if (m >= 0 && m < maxm)
2773         {
2774           op_cost = shiftadd_cost (speed, mode, m);
2775           new_limit.cost = best_cost.cost - op_cost;
2776           new_limit.latency = best_cost.latency - op_cost;
2777           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2778
2779           alg_in->cost.cost += op_cost;
2780           alg_in->cost.latency += op_cost;
2781           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2782             {
2783               struct algorithm *x;
2784               best_cost = alg_in->cost;
2785               x = alg_in, alg_in = best_alg, best_alg = x;
2786               best_alg->log[best_alg->ops] = m;
2787               best_alg->op[best_alg->ops] = alg_add_t2_m;
2788             }
2789         }
2790       if (cache_hit)
2791         goto done;
2792
2793     do_alg_sub_t2_m:
2794       q = t + 1;
2795       q = q & -q;
2796       m = exact_log2 (q);
2797       if (m >= 0 && m < maxm)
2798         {
2799           op_cost = shiftsub0_cost (speed, mode, m);
2800           new_limit.cost = best_cost.cost - op_cost;
2801           new_limit.latency = best_cost.latency - op_cost;
2802           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2803
2804           alg_in->cost.cost += op_cost;
2805           alg_in->cost.latency += op_cost;
2806           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2807             {
2808               struct algorithm *x;
2809               best_cost = alg_in->cost;
2810               x = alg_in, alg_in = best_alg, best_alg = x;
2811               best_alg->log[best_alg->ops] = m;
2812               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2813             }
2814         }
2815       if (cache_hit)
2816         goto done;
2817     }
2818
2819  done:
2820   /* If best_cost has not decreased, we have not found any algorithm.  */
2821   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2822     {
2823       /* We failed to find an algorithm.  Record alg_impossible for
2824          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2825          we are asked to find an algorithm for T within the same or
2826          lower COST_LIMIT, we can immediately return to the
2827          caller.  */
2828       entry_ptr->t = t;
2829       entry_ptr->mode = mode;
2830       entry_ptr->speed = speed;
2831       entry_ptr->alg = alg_impossible;
2832       entry_ptr->cost = *cost_limit;
2833       return;
2834     }
2835
2836   /* Cache the result.  */
2837   if (!cache_hit)
2838     {
2839       entry_ptr->t = t;
2840       entry_ptr->mode = mode;
2841       entry_ptr->speed = speed;
2842       entry_ptr->alg = best_alg->op[best_alg->ops];
2843       entry_ptr->cost.cost = best_cost.cost;
2844       entry_ptr->cost.latency = best_cost.latency;
2845     }
2846
2847   /* If we are getting a too long sequence for `struct algorithm'
2848      to record, make this search fail.  */
2849   if (best_alg->ops == MAX_BITS_PER_WORD)
2850     return;
2851
2852   /* Copy the algorithm from temporary space to the space at alg_out.
2853      We avoid using structure assignment because the majority of
2854      best_alg is normally undefined, and this is a critical function.  */
2855   alg_out->ops = best_alg->ops + 1;
2856   alg_out->cost = best_cost;
2857   memcpy (alg_out->op, best_alg->op,
2858           alg_out->ops * sizeof *alg_out->op);
2859   memcpy (alg_out->log, best_alg->log,
2860           alg_out->ops * sizeof *alg_out->log);
2861 }
2862 \f
2863 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2864    Try three variations:
2865
2866        - a shift/add sequence based on VAL itself
2867        - a shift/add sequence based on -VAL, followed by a negation
2868        - a shift/add sequence based on VAL - 1, followed by an addition.
2869
2870    Return true if the cheapest of these cost less than MULT_COST,
2871    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2872
2873 static bool
2874 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2875                      struct algorithm *alg, enum mult_variant *variant,
2876                      int mult_cost)
2877 {
2878   struct algorithm alg2;
2879   struct mult_cost limit;
2880   int op_cost;
2881   bool speed = optimize_insn_for_speed_p ();
2882
2883   /* Fail quickly for impossible bounds.  */
2884   if (mult_cost < 0)
2885     return false;
2886
2887   /* Ensure that mult_cost provides a reasonable upper bound.
2888      Any constant multiplication can be performed with less
2889      than 2 * bits additions.  */
2890   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2891   if (mult_cost > op_cost)
2892     mult_cost = op_cost;
2893
2894   *variant = basic_variant;
2895   limit.cost = mult_cost;
2896   limit.latency = mult_cost;
2897   synth_mult (alg, val, &limit, mode);
2898
2899   /* This works only if the inverted value actually fits in an
2900      `unsigned int' */
2901   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2902     {
2903       op_cost = neg_cost(speed, mode);
2904       if (MULT_COST_LESS (&alg->cost, mult_cost))
2905         {
2906           limit.cost = alg->cost.cost - op_cost;
2907           limit.latency = alg->cost.latency - op_cost;
2908         }
2909       else
2910         {
2911           limit.cost = mult_cost - op_cost;
2912           limit.latency = mult_cost - op_cost;
2913         }
2914
2915       synth_mult (&alg2, -val, &limit, mode);
2916       alg2.cost.cost += op_cost;
2917       alg2.cost.latency += op_cost;
2918       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2919         *alg = alg2, *variant = negate_variant;
2920     }
2921
2922   /* This proves very useful for division-by-constant.  */
2923   op_cost = add_cost (speed, mode);
2924   if (MULT_COST_LESS (&alg->cost, mult_cost))
2925     {
2926       limit.cost = alg->cost.cost - op_cost;
2927       limit.latency = alg->cost.latency - op_cost;
2928     }
2929   else
2930     {
2931       limit.cost = mult_cost - op_cost;
2932       limit.latency = mult_cost - op_cost;
2933     }
2934
2935   synth_mult (&alg2, val - 1, &limit, mode);
2936   alg2.cost.cost += op_cost;
2937   alg2.cost.latency += op_cost;
2938   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2939     *alg = alg2, *variant = add_variant;
2940
2941   return MULT_COST_LESS (&alg->cost, mult_cost);
2942 }
2943
2944 /* A subroutine of expand_mult, used for constant multiplications.
2945    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2946    convenient.  Use the shift/add sequence described by ALG and apply
2947    the final fixup specified by VARIANT.  */
2948
2949 static rtx
2950 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2951                    rtx target, const struct algorithm *alg,
2952                    enum mult_variant variant)
2953 {
2954   HOST_WIDE_INT val_so_far;
2955   rtx insn, accum, tem;
2956   int opno;
2957   enum machine_mode nmode;
2958
2959   /* Avoid referencing memory over and over and invalid sharing
2960      on SUBREGs.  */
2961   op0 = force_reg (mode, op0);
2962
2963   /* ACCUM starts out either as OP0 or as a zero, depending on
2964      the first operation.  */
2965
2966   if (alg->op[0] == alg_zero)
2967     {
2968       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2969       val_so_far = 0;
2970     }
2971   else if (alg->op[0] == alg_m)
2972     {
2973       accum = copy_to_mode_reg (mode, op0);
2974       val_so_far = 1;
2975     }
2976   else
2977     gcc_unreachable ();
2978
2979   for (opno = 1; opno < alg->ops; opno++)
2980     {
2981       int log = alg->log[opno];
2982       rtx shift_subtarget = optimize ? 0 : accum;
2983       rtx add_target
2984         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2985            && !optimize)
2986           ? target : 0;
2987       rtx accum_target = optimize ? 0 : accum;
2988       rtx accum_inner;
2989
2990       switch (alg->op[opno])
2991         {
2992         case alg_shift:
2993           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2994           /* REG_EQUAL note will be attached to the following insn.  */
2995           emit_move_insn (accum, tem);
2996           val_so_far <<= log;
2997           break;
2998
2999         case alg_add_t_m2:
3000           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3001           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3002                                  add_target ? add_target : accum_target);
3003           val_so_far += (HOST_WIDE_INT) 1 << log;
3004           break;
3005
3006         case alg_sub_t_m2:
3007           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
3008           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
3009                                  add_target ? add_target : accum_target);
3010           val_so_far -= (HOST_WIDE_INT) 1 << log;
3011           break;
3012
3013         case alg_add_t2_m:
3014           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3015                                 log, shift_subtarget, 0);
3016           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
3017                                  add_target ? add_target : accum_target);
3018           val_so_far = (val_so_far << log) + 1;
3019           break;
3020
3021         case alg_sub_t2_m:
3022           accum = expand_shift (LSHIFT_EXPR, mode, accum,
3023                                 log, shift_subtarget, 0);
3024           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
3025                                  add_target ? add_target : accum_target);
3026           val_so_far = (val_so_far << log) - 1;
3027           break;
3028
3029         case alg_add_factor:
3030           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3031           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
3032                                  add_target ? add_target : accum_target);
3033           val_so_far += val_so_far << log;
3034           break;
3035
3036         case alg_sub_factor:
3037           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
3038           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
3039                                  (add_target
3040                                   ? add_target : (optimize ? 0 : tem)));
3041           val_so_far = (val_so_far << log) - val_so_far;
3042           break;
3043
3044         default:
3045           gcc_unreachable ();
3046         }
3047
3048       if (SCALAR_INT_MODE_P (mode))
3049         {
3050           /* Write a REG_EQUAL note on the last insn so that we can cse
3051              multiplication sequences.  Note that if ACCUM is a SUBREG,
3052              we've set the inner register and must properly indicate that.  */
3053           tem = op0, nmode = mode;
3054           accum_inner = accum;
3055           if (GET_CODE (accum) == SUBREG)
3056             {
3057               accum_inner = SUBREG_REG (accum);
3058               nmode = GET_MODE (accum_inner);
3059               tem = gen_lowpart (nmode, op0);
3060             }
3061
3062           insn = get_last_insn ();
3063           set_dst_reg_note (insn, REG_EQUAL,
3064                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
3065                             accum_inner);
3066         }
3067     }
3068
3069   if (variant == negate_variant)
3070     {
3071       val_so_far = -val_so_far;
3072       accum = expand_unop (mode, neg_optab, accum, target, 0);
3073     }
3074   else if (variant == add_variant)
3075     {
3076       val_so_far = val_so_far + 1;
3077       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3078     }
3079
3080   /* Compare only the bits of val and val_so_far that are significant
3081      in the result mode, to avoid sign-/zero-extension confusion.  */
3082   nmode = GET_MODE_INNER (mode);
3083   if (nmode == VOIDmode)
3084     nmode = mode;
3085   val &= GET_MODE_MASK (nmode);
3086   val_so_far &= GET_MODE_MASK (nmode);
3087   gcc_assert (val == val_so_far);
3088
3089   return accum;
3090 }
3091
3092 /* Perform a multiplication and return an rtx for the result.
3093    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3094    TARGET is a suggestion for where to store the result (an rtx).
3095
3096    We check specially for a constant integer as OP1.
3097    If you want this check for OP0 as well, then before calling
3098    you should swap the two operands if OP0 would be constant.  */
3099
3100 rtx
3101 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3102              int unsignedp)
3103 {
3104   enum mult_variant variant;
3105   struct algorithm algorithm;
3106   rtx scalar_op1;
3107   int max_cost;
3108   bool speed = optimize_insn_for_speed_p ();
3109   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3110
3111   if (CONSTANT_P (op0))
3112     {
3113       rtx temp = op0;
3114       op0 = op1;
3115       op1 = temp;
3116     }
3117
3118   /* For vectors, there are several simplifications that can be made if
3119      all elements of the vector constant are identical.  */
3120   scalar_op1 = op1;
3121   if (GET_CODE (op1) == CONST_VECTOR)
3122     {
3123       int i, n = CONST_VECTOR_NUNITS (op1);
3124       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3125       for (i = 1; i < n; ++i)
3126         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3127           goto skip_scalar;
3128     }
3129
3130   if (INTEGRAL_MODE_P (mode))
3131     {
3132       rtx fake_reg;
3133       HOST_WIDE_INT coeff;
3134       bool is_neg;
3135       int mode_bitsize;
3136
3137       if (op1 == CONST0_RTX (mode))
3138         return op1;
3139       if (op1 == CONST1_RTX (mode))
3140         return op0;
3141       if (op1 == CONSTM1_RTX (mode))
3142         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3143                             op0, target, 0);
3144
3145       if (do_trapv)
3146         goto skip_synth;
3147
3148       /* These are the operations that are potentially turned into
3149          a sequence of shifts and additions.  */
3150       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3151
3152       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3153          less than or equal in size to `unsigned int' this doesn't matter.
3154          If the mode is larger than `unsigned int', then synth_mult works
3155          only if the constant value exactly fits in an `unsigned int' without
3156          any truncation.  This means that multiplying by negative values does
3157          not work; results are off by 2^32 on a 32 bit machine.  */
3158
3159       if (CONST_INT_P (scalar_op1))
3160         {
3161           coeff = INTVAL (scalar_op1);
3162           is_neg = coeff < 0;
3163         }
3164       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3165         {
3166           /* If we are multiplying in DImode, it may still be a win
3167              to try to work with shifts and adds.  */
3168           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3169               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3170             {
3171               coeff = CONST_DOUBLE_LOW (scalar_op1);
3172               is_neg = false;
3173             }
3174           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3175             {
3176               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3177               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3178                 {
3179                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3180                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3181                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3182                     return expand_shift (LSHIFT_EXPR, mode, op0,
3183                                          shift, target, unsignedp);
3184                 }
3185               goto skip_synth;
3186             }
3187           else
3188             goto skip_synth;
3189         }
3190       else
3191         goto skip_synth;
3192
3193       /* We used to test optimize here, on the grounds that it's better to
3194          produce a smaller program when -O is not used.  But this causes
3195          such a terrible slowdown sometimes that it seems better to always
3196          use synth_mult.  */
3197
3198       /* Special case powers of two.  */
3199       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3200         return expand_shift (LSHIFT_EXPR, mode, op0,
3201                              floor_log2 (coeff), target, unsignedp);
3202
3203       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3204
3205       /* Attempt to handle multiplication of DImode values by negative
3206          coefficients, by performing the multiplication by a positive
3207          multiplier and then inverting the result.  */
3208       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3209         {
3210           /* Its safe to use -coeff even for INT_MIN, as the
3211              result is interpreted as an unsigned coefficient.
3212              Exclude cost of op0 from max_cost to match the cost
3213              calculation of the synth_mult.  */
3214           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3215                       - neg_cost(speed, mode));
3216           if (max_cost > 0
3217               && choose_mult_variant (mode, -coeff, &algorithm,
3218                                       &variant, max_cost))
3219             {
3220               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3221                                             &algorithm, variant);
3222               return expand_unop (mode, neg_optab, temp, target, 0);
3223             }
3224           goto skip_synth;
3225         }
3226
3227       /* Exclude cost of op0 from max_cost to match the cost
3228          calculation of the synth_mult.  */
3229       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3230       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3231         return expand_mult_const (mode, op0, coeff, target,
3232                                   &algorithm, variant);
3233     }
3234  skip_synth:
3235
3236   /* Expand x*2.0 as x+x.  */
3237   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3238     {
3239       REAL_VALUE_TYPE d;
3240       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3241
3242       if (REAL_VALUES_EQUAL (d, dconst2))
3243         {
3244           op0 = force_reg (GET_MODE (op0), op0);
3245           return expand_binop (mode, add_optab, op0, op0,
3246                                target, unsignedp, OPTAB_LIB_WIDEN);
3247         }
3248     }
3249  skip_scalar:
3250
3251   /* This used to use umul_optab if unsigned, but for non-widening multiply
3252      there is no difference between signed and unsigned.  */
3253   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3254                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3255   gcc_assert (op0);
3256   return op0;
3257 }
3258
3259 /* Return a cost estimate for multiplying a register by the given
3260    COEFFicient in the given MODE and SPEED.  */
3261
3262 int
3263 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3264 {
3265   int max_cost;
3266   struct algorithm algorithm;
3267   enum mult_variant variant;
3268
3269   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3270   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3271   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3272     return algorithm.cost.cost;
3273   else
3274     return max_cost;
3275 }
3276
3277 /* Perform a widening multiplication and return an rtx for the result.
3278    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3279    TARGET is a suggestion for where to store the result (an rtx).
3280    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3281    or smul_widen_optab.
3282
3283    We check specially for a constant integer as OP1, comparing the
3284    cost of a widening multiply against the cost of a sequence of shifts
3285    and adds.  */
3286
3287 rtx
3288 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3289                       int unsignedp, optab this_optab)
3290 {
3291   bool speed = optimize_insn_for_speed_p ();
3292   rtx cop1;
3293
3294   if (CONST_INT_P (op1)
3295       && GET_MODE (op0) != VOIDmode
3296       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3297                                 this_optab == umul_widen_optab))
3298       && CONST_INT_P (cop1)
3299       && (INTVAL (cop1) >= 0
3300           || HWI_COMPUTABLE_MODE_P (mode)))
3301     {
3302       HOST_WIDE_INT coeff = INTVAL (cop1);
3303       int max_cost;
3304       enum mult_variant variant;
3305       struct algorithm algorithm;
3306
3307       /* Special case powers of two.  */
3308       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3309         {
3310           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3311           return expand_shift (LSHIFT_EXPR, mode, op0,
3312                                floor_log2 (coeff), target, unsignedp);
3313         }
3314
3315       /* Exclude cost of op0 from max_cost to match the cost
3316          calculation of the synth_mult.  */
3317       max_cost = mul_widen_cost (speed, mode);
3318       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3319                                max_cost))
3320         {
3321           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3322           return expand_mult_const (mode, op0, coeff, target,
3323                                     &algorithm, variant);
3324         }
3325     }
3326   return expand_binop (mode, this_optab, op0, op1, target,
3327                        unsignedp, OPTAB_LIB_WIDEN);
3328 }
3329 \f
3330 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3331    replace division by D, and put the least significant N bits of the result
3332    in *MULTIPLIER_PTR and return the most significant bit.
3333
3334    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3335    needed precision is in PRECISION (should be <= N).
3336
3337    PRECISION should be as small as possible so this function can choose
3338    multiplier more freely.
3339
3340    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3341    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3342
3343    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3344    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3345
3346 unsigned HOST_WIDE_INT
3347 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3348                    unsigned HOST_WIDE_INT *multiplier_ptr,
3349                    int *post_shift_ptr, int *lgup_ptr)
3350 {
3351   double_int mhigh, mlow;
3352   int lgup, post_shift;
3353   int pow, pow2;
3354
3355   /* lgup = ceil(log2(divisor)); */
3356   lgup = ceil_log2 (d);
3357
3358   gcc_assert (lgup <= n);
3359
3360   pow = n + lgup;
3361   pow2 = n + lgup - precision;
3362
3363   /* We could handle this with some effort, but this case is much
3364      better handled directly with a scc insn, so rely on caller using
3365      that.  */
3366   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3367
3368   /* mlow = 2^(N + lgup)/d */
3369   double_int val = double_int_zero.set_bit (pow);
3370   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3371
3372   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3373   val |= double_int_zero.set_bit (pow2);
3374   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3375
3376   gcc_assert (!mhigh.high || val.high - d < d);
3377   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3378   /* Assert that mlow < mhigh.  */
3379   gcc_assert (mlow.ult (mhigh));
3380
3381   /* If precision == N, then mlow, mhigh exceed 2^N
3382      (but they do not exceed 2^(N+1)).  */
3383
3384   /* Reduce to lowest terms.  */
3385   for (post_shift = lgup; post_shift > 0; post_shift--)
3386     {
3387       int shft = HOST_BITS_PER_WIDE_INT - 1;
3388       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3389       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3390       if (ml_lo >= mh_lo)
3391         break;
3392
3393       mlow = double_int::from_uhwi (ml_lo);
3394       mhigh = double_int::from_uhwi (mh_lo);
3395     }
3396
3397   *post_shift_ptr = post_shift;
3398   *lgup_ptr = lgup;
3399   if (n < HOST_BITS_PER_WIDE_INT)
3400     {
3401       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3402       *multiplier_ptr = mhigh.low & mask;
3403       return mhigh.low >= mask;
3404     }
3405   else
3406     {
3407       *multiplier_ptr = mhigh.low;
3408       return mhigh.high;
3409     }
3410 }
3411
3412 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3413    congruent to 1 (mod 2**N).  */
3414
3415 static unsigned HOST_WIDE_INT
3416 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3417 {
3418   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3419
3420   /* The algorithm notes that the choice y = x satisfies
3421      x*y == 1 mod 2^3, since x is assumed odd.
3422      Each iteration doubles the number of bits of significance in y.  */
3423
3424   unsigned HOST_WIDE_INT mask;
3425   unsigned HOST_WIDE_INT y = x;
3426   int nbit = 3;
3427
3428   mask = (n == HOST_BITS_PER_WIDE_INT
3429           ? ~(unsigned HOST_WIDE_INT) 0
3430           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3431
3432   while (nbit < n)
3433     {
3434       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3435       nbit *= 2;
3436     }
3437   return y;
3438 }
3439
3440 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3441    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3442    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3443    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3444    become signed.
3445
3446    The result is put in TARGET if that is convenient.
3447
3448    MODE is the mode of operation.  */
3449
3450 rtx
3451 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3452                              rtx op1, rtx target, int unsignedp)
3453 {
3454   rtx tem;
3455   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3456
3457   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3458                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3459   tem = expand_and (mode, tem, op1, NULL_RTX);
3460   adj_operand
3461     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3462                      adj_operand);
3463
3464   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3465                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3466   tem = expand_and (mode, tem, op0, NULL_RTX);
3467   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3468                           target);
3469
3470   return target;
3471 }
3472
3473 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3474
3475 static rtx
3476 extract_high_half (enum machine_mode mode, rtx op)
3477 {
3478   enum machine_mode wider_mode;
3479
3480   if (mode == word_mode)
3481     return gen_highpart (mode, op);
3482
3483   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3484
3485   wider_mode = GET_MODE_WIDER_MODE (mode);
3486   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3487                      GET_MODE_BITSIZE (mode), 0, 1);
3488   return convert_modes (mode, wider_mode, op, 0);
3489 }
3490
3491 /* Like expmed_mult_highpart, but only consider using a multiplication
3492    optab.  OP1 is an rtx for the constant operand.  */
3493
3494 static rtx
3495 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3496                             rtx target, int unsignedp, int max_cost)
3497 {
3498   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3499   enum machine_mode wider_mode;
3500   optab moptab;
3501   rtx tem;
3502   int size;
3503   bool speed = optimize_insn_for_speed_p ();
3504
3505   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3506
3507   wider_mode = GET_MODE_WIDER_MODE (mode);
3508   size = GET_MODE_BITSIZE (mode);
3509
3510   /* Firstly, try using a multiplication insn that only generates the needed
3511      high part of the product, and in the sign flavor of unsignedp.  */
3512   if (mul_highpart_cost (speed, mode) < max_cost)
3513     {
3514       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3515       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3516                           unsignedp, OPTAB_DIRECT);
3517       if (tem)
3518         return tem;
3519     }
3520
3521   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3522      Need to adjust the result after the multiplication.  */
3523   if (size - 1 < BITS_PER_WORD
3524       && (mul_highpart_cost (speed, mode)
3525           + 2 * shift_cost (speed, mode, size-1)
3526           + 4 * add_cost (speed, mode) < max_cost))
3527     {
3528       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3529       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3530                           unsignedp, OPTAB_DIRECT);
3531       if (tem)
3532         /* We used the wrong signedness.  Adjust the result.  */
3533         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3534                                             tem, unsignedp);
3535     }
3536
3537   /* Try widening multiplication.  */
3538   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3539   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3540       && mul_widen_cost (speed, wider_mode) < max_cost)
3541     {
3542       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3543                           unsignedp, OPTAB_WIDEN);
3544       if (tem)
3545         return extract_high_half (mode, tem);
3546     }
3547
3548   /* Try widening the mode and perform a non-widening multiplication.  */
3549   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3550       && size - 1 < BITS_PER_WORD
3551       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3552           < max_cost))
3553     {
3554       rtx insns, wop0, wop1;
3555
3556       /* We need to widen the operands, for example to ensure the
3557          constant multiplier is correctly sign or zero extended.
3558          Use a sequence to clean-up any instructions emitted by
3559          the conversions if things don't work out.  */
3560       start_sequence ();
3561       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3562       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3563       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3564                           unsignedp, OPTAB_WIDEN);
3565       insns = get_insns ();
3566       end_sequence ();
3567
3568       if (tem)
3569         {
3570           emit_insn (insns);
3571           return extract_high_half (mode, tem);
3572         }
3573     }
3574
3575   /* Try widening multiplication of opposite signedness, and adjust.  */
3576   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3577   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3578       && size - 1 < BITS_PER_WORD
3579       && (mul_widen_cost (speed, wider_mode)
3580           + 2 * shift_cost (speed, mode, size-1)
3581           + 4 * add_cost (speed, mode) < max_cost))
3582     {
3583       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3584                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3585       if (tem != 0)
3586         {
3587           tem = extract_high_half (mode, tem);
3588           /* We used the wrong signedness.  Adjust the result.  */
3589           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3590                                               target, unsignedp);
3591         }
3592     }
3593
3594   return 0;
3595 }
3596
3597 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3598    putting the high half of the result in TARGET if that is convenient,
3599    and return where the result is.  If the operation can not be performed,
3600    0 is returned.
3601
3602    MODE is the mode of operation and result.
3603
3604    UNSIGNEDP nonzero means unsigned multiply.
3605
3606    MAX_COST is the total allowed cost for the expanded RTL.  */
3607
3608 static rtx
3609 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3610                       rtx target, int unsignedp, int max_cost)
3611 {
3612   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3613   unsigned HOST_WIDE_INT cnst1;
3614   int extra_cost;
3615   bool sign_adjust = false;
3616   enum mult_variant variant;
3617   struct algorithm alg;
3618   rtx tem;
3619   bool speed = optimize_insn_for_speed_p ();
3620
3621   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3622   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3623   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3624
3625   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3626
3627   /* We can't optimize modes wider than BITS_PER_WORD.
3628      ??? We might be able to perform double-word arithmetic if
3629      mode == word_mode, however all the cost calculations in
3630      synth_mult etc. assume single-word operations.  */
3631   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3632     return expmed_mult_highpart_optab (mode, op0, op1, target,
3633                                        unsignedp, max_cost);
3634
3635   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3636
3637   /* Check whether we try to multiply by a negative constant.  */
3638   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3639     {
3640       sign_adjust = true;
3641       extra_cost += add_cost (speed, mode);
3642     }
3643
3644   /* See whether shift/add multiplication is cheap enough.  */
3645   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3646                            max_cost - extra_cost))
3647     {
3648       /* See whether the specialized multiplication optabs are
3649          cheaper than the shift/add version.  */
3650       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3651                                         alg.cost.cost + extra_cost);
3652       if (tem)
3653         return tem;
3654
3655       tem = convert_to_mode (wider_mode, op0, unsignedp);
3656       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3657       tem = extract_high_half (mode, tem);
3658
3659       /* Adjust result for signedness.  */
3660       if (sign_adjust)
3661         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3662
3663       return tem;
3664     }
3665   return expmed_mult_highpart_optab (mode, op0, op1, target,
3666                                      unsignedp, max_cost);
3667 }
3668
3669
3670 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3671
3672 static rtx
3673 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3674 {
3675   unsigned HOST_WIDE_INT masklow, maskhigh;
3676   rtx result, temp, shift, label;
3677   int logd;
3678
3679   logd = floor_log2 (d);
3680   result = gen_reg_rtx (mode);
3681
3682   /* Avoid conditional branches when they're expensive.  */
3683   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3684       && optimize_insn_for_speed_p ())
3685     {
3686       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3687                                       mode, 0, -1);
3688       if (signmask)
3689         {
3690           signmask = force_reg (mode, signmask);
3691           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3692           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3693
3694           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3695              which instruction sequence to use.  If logical right shifts
3696              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3697              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3698
3699           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3700           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3701               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3702                   > COSTS_N_INSNS (2)))
3703             {
3704               temp = expand_binop (mode, xor_optab, op0, signmask,
3705                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3706               temp = expand_binop (mode, sub_optab, temp, signmask,
3707                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3708               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3709                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3710               temp = expand_binop (mode, xor_optab, temp, signmask,
3711                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3712               temp = expand_binop (mode, sub_optab, temp, signmask,
3713                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3714             }
3715           else
3716             {
3717               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3718                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3719               signmask = force_reg (mode, signmask);
3720
3721               temp = expand_binop (mode, add_optab, op0, signmask,
3722                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3723               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3724                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3725               temp = expand_binop (mode, sub_optab, temp, signmask,
3726                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3727             }
3728           return temp;
3729         }
3730     }
3731
3732   /* Mask contains the mode's signbit and the significant bits of the
3733      modulus.  By including the signbit in the operation, many targets
3734      can avoid an explicit compare operation in the following comparison
3735      against zero.  */
3736
3737   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3738   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3739     {
3740       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3741       maskhigh = -1;
3742     }
3743   else
3744     maskhigh = (HOST_WIDE_INT) -1
3745                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3746
3747   temp = expand_binop (mode, and_optab, op0,
3748                        immed_double_const (masklow, maskhigh, mode),
3749                        result, 1, OPTAB_LIB_WIDEN);
3750   if (temp != result)
3751     emit_move_insn (result, temp);
3752
3753   label = gen_label_rtx ();
3754   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3755
3756   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3757                        0, OPTAB_LIB_WIDEN);
3758   masklow = (HOST_WIDE_INT) -1 << logd;
3759   maskhigh = -1;
3760   temp = expand_binop (mode, ior_optab, temp,
3761                        immed_double_const (masklow, maskhigh, mode),
3762                        result, 1, OPTAB_LIB_WIDEN);
3763   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3764                        0, OPTAB_LIB_WIDEN);
3765   if (temp != result)
3766     emit_move_insn (result, temp);
3767   emit_label (label);
3768   return result;
3769 }
3770
3771 /* Expand signed division of OP0 by a power of two D in mode MODE.
3772    This routine is only called for positive values of D.  */
3773
3774 static rtx
3775 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3776 {
3777   rtx temp, label;
3778   int logd;
3779
3780   logd = floor_log2 (d);
3781
3782   if (d == 2
3783       && BRANCH_COST (optimize_insn_for_speed_p (),
3784                       false) >= 1)
3785     {
3786       temp = gen_reg_rtx (mode);
3787       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3788       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3789                            0, OPTAB_LIB_WIDEN);
3790       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3791     }
3792
3793 #ifdef HAVE_conditional_move
3794   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3795       >= 2)
3796     {
3797       rtx temp2;
3798
3799       /* ??? emit_conditional_move forces a stack adjustment via
3800          compare_from_rtx so, if the sequence is discarded, it will
3801          be lost.  Do it now instead.  */
3802       do_pending_stack_adjust ();
3803
3804       start_sequence ();
3805       temp2 = copy_to_mode_reg (mode, op0);
3806       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3807                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3808       temp = force_reg (mode, temp);
3809
3810       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3811       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3812                                      mode, temp, temp2, mode, 0);
3813       if (temp2)
3814         {
3815           rtx seq = get_insns ();
3816           end_sequence ();
3817           emit_insn (seq);
3818           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3819         }
3820       end_sequence ();
3821     }
3822 #endif
3823
3824   if (BRANCH_COST (optimize_insn_for_speed_p (),
3825                    false) >= 2)
3826     {
3827       int ushift = GET_MODE_BITSIZE (mode) - logd;
3828
3829       temp = gen_reg_rtx (mode);
3830       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3831       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3832           > COSTS_N_INSNS (1))
3833         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3834                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3835       else
3836         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3837                              ushift, NULL_RTX, 1);
3838       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3839                            0, OPTAB_LIB_WIDEN);
3840       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3841     }
3842
3843   label = gen_label_rtx ();
3844   temp = copy_to_mode_reg (mode, op0);
3845   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3846   expand_inc (temp, GEN_INT (d - 1));
3847   emit_label (label);
3848   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3849 }
3850 \f
3851 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3852    if that is convenient, and returning where the result is.
3853    You may request either the quotient or the remainder as the result;
3854    specify REM_FLAG nonzero to get the remainder.
3855
3856    CODE is the expression code for which kind of division this is;
3857    it controls how rounding is done.  MODE is the machine mode to use.
3858    UNSIGNEDP nonzero means do unsigned division.  */
3859
3860 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3861    and then correct it by or'ing in missing high bits
3862    if result of ANDI is nonzero.
3863    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3864    This could optimize to a bfexts instruction.
3865    But C doesn't use these operations, so their optimizations are
3866    left for later.  */
3867 /* ??? For modulo, we don't actually need the highpart of the first product,
3868    the low part will do nicely.  And for small divisors, the second multiply
3869    can also be a low-part only multiply or even be completely left out.
3870    E.g. to calculate the remainder of a division by 3 with a 32 bit
3871    multiply, multiply with 0x55555556 and extract the upper two bits;
3872    the result is exact for inputs up to 0x1fffffff.
3873    The input range can be reduced by using cross-sum rules.
3874    For odd divisors >= 3, the following table gives right shift counts
3875    so that if a number is shifted by an integer multiple of the given
3876    amount, the remainder stays the same:
3877    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3878    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3879    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3880    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3881    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3882
3883    Cross-sum rules for even numbers can be derived by leaving as many bits
3884    to the right alone as the divisor has zeros to the right.
3885    E.g. if x is an unsigned 32 bit number:
3886    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3887    */
3888
3889 rtx
3890 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3891                rtx op0, rtx op1, rtx target, int unsignedp)
3892 {
3893   enum machine_mode compute_mode;
3894   rtx tquotient;
3895   rtx quotient = 0, remainder = 0;
3896   rtx last;
3897   int size;
3898   rtx insn;
3899   optab optab1, optab2;
3900   int op1_is_constant, op1_is_pow2 = 0;
3901   int max_cost, extra_cost;
3902   static HOST_WIDE_INT last_div_const = 0;
3903   static HOST_WIDE_INT ext_op1;
3904   bool speed = optimize_insn_for_speed_p ();
3905
3906   op1_is_constant = CONST_INT_P (op1);
3907   if (op1_is_constant)
3908     {
3909       ext_op1 = INTVAL (op1);
3910       if (unsignedp)
3911         ext_op1 &= GET_MODE_MASK (mode);
3912       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3913                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3914     }
3915
3916   /*
3917      This is the structure of expand_divmod:
3918
3919      First comes code to fix up the operands so we can perform the operations
3920      correctly and efficiently.
3921
3922      Second comes a switch statement with code specific for each rounding mode.
3923      For some special operands this code emits all RTL for the desired
3924      operation, for other cases, it generates only a quotient and stores it in
3925      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3926      to indicate that it has not done anything.
3927
3928      Last comes code that finishes the operation.  If QUOTIENT is set and
3929      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3930      QUOTIENT is not set, it is computed using trunc rounding.
3931
3932      We try to generate special code for division and remainder when OP1 is a
3933      constant.  If |OP1| = 2**n we can use shifts and some other fast
3934      operations.  For other values of OP1, we compute a carefully selected
3935      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3936      by m.
3937
3938      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3939      half of the product.  Different strategies for generating the product are
3940      implemented in expmed_mult_highpart.
3941
3942      If what we actually want is the remainder, we generate that by another
3943      by-constant multiplication and a subtraction.  */
3944
3945   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3946      code below will malfunction if we are, so check here and handle
3947      the special case if so.  */
3948   if (op1 == const1_rtx)
3949     return rem_flag ? const0_rtx : op0;
3950
3951     /* When dividing by -1, we could get an overflow.
3952      negv_optab can handle overflows.  */
3953   if (! unsignedp && op1 == constm1_rtx)
3954     {
3955       if (rem_flag)
3956         return const0_rtx;
3957       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3958                           ? negv_optab : neg_optab, op0, target, 0);
3959     }
3960
3961   if (target
3962       /* Don't use the function value register as a target
3963          since we have to read it as well as write it,
3964          and function-inlining gets confused by this.  */
3965       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3966           /* Don't clobber an operand while doing a multi-step calculation.  */
3967           || ((rem_flag || op1_is_constant)
3968               && (reg_mentioned_p (target, op0)
3969                   || (MEM_P (op0) && MEM_P (target))))
3970           || reg_mentioned_p (target, op1)
3971           || (MEM_P (op1) && MEM_P (target))))
3972     target = 0;
3973
3974   /* Get the mode in which to perform this computation.  Normally it will
3975      be MODE, but sometimes we can't do the desired operation in MODE.
3976      If so, pick a wider mode in which we can do the operation.  Convert
3977      to that mode at the start to avoid repeated conversions.
3978
3979      First see what operations we need.  These depend on the expression
3980      we are evaluating.  (We assume that divxx3 insns exist under the
3981      same conditions that modxx3 insns and that these insns don't normally
3982      fail.  If these assumptions are not correct, we may generate less
3983      efficient code in some cases.)
3984
3985      Then see if we find a mode in which we can open-code that operation
3986      (either a division, modulus, or shift).  Finally, check for the smallest
3987      mode for which we can do the operation with a library call.  */
3988
3989   /* We might want to refine this now that we have division-by-constant
3990      optimization.  Since expmed_mult_highpart tries so many variants, it is
3991      not straightforward to generalize this.  Maybe we should make an array
3992      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3993
3994   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3995             ? (unsignedp ? lshr_optab : ashr_optab)
3996             : (unsignedp ? udiv_optab : sdiv_optab));
3997   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3998             ? optab1
3999             : (unsignedp ? udivmod_optab : sdivmod_optab));
4000
4001   for (compute_mode = mode; compute_mode != VOIDmode;
4002        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4003     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
4004         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
4005       break;
4006
4007   if (compute_mode == VOIDmode)
4008     for (compute_mode = mode; compute_mode != VOIDmode;
4009          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
4010       if (optab_libfunc (optab1, compute_mode)
4011           || optab_libfunc (optab2, compute_mode))
4012         break;
4013
4014   /* If we still couldn't find a mode, use MODE, but expand_binop will
4015      probably die.  */
4016   if (compute_mode == VOIDmode)
4017     compute_mode = mode;
4018
4019   if (target && GET_MODE (target) == compute_mode)
4020     tquotient = target;
4021   else
4022     tquotient = gen_reg_rtx (compute_mode);
4023
4024   size = GET_MODE_BITSIZE (compute_mode);
4025 #if 0
4026   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
4027      (mode), and thereby get better code when OP1 is a constant.  Do that
4028      later.  It will require going over all usages of SIZE below.  */
4029   size = GET_MODE_BITSIZE (mode);
4030 #endif
4031
4032   /* Only deduct something for a REM if the last divide done was
4033      for a different constant.   Then set the constant of the last
4034      divide.  */
4035   max_cost = (unsignedp
4036               ? udiv_cost (speed, compute_mode)
4037               : sdiv_cost (speed, compute_mode));
4038   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
4039                      && INTVAL (op1) == last_div_const))
4040     max_cost -= (mul_cost (speed, compute_mode)
4041                  + add_cost (speed, compute_mode));
4042
4043   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
4044
4045   /* Now convert to the best mode to use.  */
4046   if (compute_mode != mode)
4047     {
4048       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
4049       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
4050
4051       /* convert_modes may have placed op1 into a register, so we
4052          must recompute the following.  */
4053       op1_is_constant = CONST_INT_P (op1);
4054       op1_is_pow2 = (op1_is_constant
4055                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4056                           || (! unsignedp
4057                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
4058     }
4059
4060   /* If one of the operands is a volatile MEM, copy it into a register.  */
4061
4062   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
4063     op0 = force_reg (compute_mode, op0);
4064   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
4065     op1 = force_reg (compute_mode, op1);
4066
4067   /* If we need the remainder or if OP1 is constant, we need to
4068      put OP0 in a register in case it has any queued subexpressions.  */
4069   if (rem_flag || op1_is_constant)
4070     op0 = force_reg (compute_mode, op0);
4071
4072   last = get_last_insn ();
4073
4074   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4075   if (unsignedp)
4076     {
4077       if (code == FLOOR_DIV_EXPR)
4078         code = TRUNC_DIV_EXPR;
4079       if (code == FLOOR_MOD_EXPR)
4080         code = TRUNC_MOD_EXPR;
4081       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4082         code = TRUNC_DIV_EXPR;
4083     }
4084
4085   if (op1 != const0_rtx)
4086     switch (code)
4087       {
4088       case TRUNC_MOD_EXPR:
4089       case TRUNC_DIV_EXPR:
4090         if (op1_is_constant)
4091           {
4092             if (unsignedp)
4093               {
4094                 unsigned HOST_WIDE_INT mh, ml;
4095                 int pre_shift, post_shift;
4096                 int dummy;
4097                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4098                                             & GET_MODE_MASK (compute_mode));
4099
4100                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4101                   {
4102                     pre_shift = floor_log2 (d);
4103                     if (rem_flag)
4104                       {
4105                         remainder
4106                           = expand_binop (compute_mode, and_optab, op0,
4107                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4108                                           remainder, 1,
4109                                           OPTAB_LIB_WIDEN);
4110                         if (remainder)
4111                           return gen_lowpart (mode, remainder);
4112                       }
4113                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4114                                              pre_shift, tquotient, 1);
4115                   }
4116                 else if (size <= HOST_BITS_PER_WIDE_INT)
4117                   {
4118                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4119                       {
4120                         /* Most significant bit of divisor is set; emit an scc
4121                            insn.  */
4122                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4123                                                           compute_mode, 1, 1);
4124                       }
4125                     else
4126                       {
4127                         /* Find a suitable multiplier and right shift count
4128                            instead of multiplying with D.  */
4129
4130                         mh = choose_multiplier (d, size, size,
4131                                                 &ml, &post_shift, &dummy);
4132
4133                         /* If the suggested multiplier is more than SIZE bits,
4134                            we can do better for even divisors, using an
4135                            initial right shift.  */
4136                         if (mh != 0 && (d & 1) == 0)
4137                           {
4138                             pre_shift = floor_log2 (d & -d);
4139                             mh = choose_multiplier (d >> pre_shift, size,
4140                                                     size - pre_shift,
4141                                                     &ml, &post_shift, &dummy);
4142                             gcc_assert (!mh);
4143                           }
4144                         else
4145                           pre_shift = 0;
4146
4147                         if (mh != 0)
4148                           {
4149                             rtx t1, t2, t3, t4;
4150
4151                             if (post_shift - 1 >= BITS_PER_WORD)
4152                               goto fail1;
4153
4154                             extra_cost
4155                               = (shift_cost (speed, compute_mode, post_shift - 1)
4156                                  + shift_cost (speed, compute_mode, 1)
4157                                  + 2 * add_cost (speed, compute_mode));
4158                             t1 = expmed_mult_highpart (compute_mode, op0,
4159                                                        GEN_INT (ml),
4160                                                        NULL_RTX, 1,
4161                                                        max_cost - extra_cost);
4162                             if (t1 == 0)
4163                               goto fail1;
4164                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4165                                                                op0, t1),
4166                                                 NULL_RTX);
4167                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4168                                                t2, 1, NULL_RTX, 1);
4169                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4170                                                               t1, t3),
4171                                                 NULL_RTX);
4172                             quotient = expand_shift
4173                               (RSHIFT_EXPR, compute_mode, t4,
4174                                post_shift - 1, tquotient, 1);
4175                           }
4176                         else
4177                           {
4178                             rtx t1, t2;
4179
4180                             if (pre_shift >= BITS_PER_WORD
4181                                 || post_shift >= BITS_PER_WORD)
4182                               goto fail1;
4183
4184                             t1 = expand_shift
4185                               (RSHIFT_EXPR, compute_mode, op0,
4186                                pre_shift, NULL_RTX, 1);
4187                             extra_cost
4188                               = (shift_cost (speed, compute_mode, pre_shift)
4189                                  + shift_cost (speed, compute_mode, post_shift));
4190                             t2 = expmed_mult_highpart (compute_mode, t1,
4191                                                        GEN_INT (ml),
4192                                                        NULL_RTX, 1,
4193                                                        max_cost - extra_cost);
4194                             if (t2 == 0)
4195                               goto fail1;
4196                             quotient = expand_shift
4197                               (RSHIFT_EXPR, compute_mode, t2,
4198                                post_shift, tquotient, 1);
4199                           }
4200                       }
4201                   }
4202                 else            /* Too wide mode to use tricky code */
4203                   break;
4204
4205                 insn = get_last_insn ();
4206                 if (insn != last)
4207                   set_dst_reg_note (insn, REG_EQUAL,
4208                                     gen_rtx_UDIV (compute_mode, op0, op1),
4209                                     quotient);
4210               }
4211             else                /* TRUNC_DIV, signed */
4212               {
4213                 unsigned HOST_WIDE_INT ml;
4214                 int lgup, post_shift;
4215                 rtx mlr;
4216                 HOST_WIDE_INT d = INTVAL (op1);
4217                 unsigned HOST_WIDE_INT abs_d;
4218
4219                 /* Since d might be INT_MIN, we have to cast to
4220                    unsigned HOST_WIDE_INT before negating to avoid
4221                    undefined signed overflow.  */
4222                 abs_d = (d >= 0
4223                          ? (unsigned HOST_WIDE_INT) d
4224                          : - (unsigned HOST_WIDE_INT) d);
4225
4226                 /* n rem d = n rem -d */
4227                 if (rem_flag && d < 0)
4228                   {
4229                     d = abs_d;
4230                     op1 = gen_int_mode (abs_d, compute_mode);
4231                   }
4232
4233                 if (d == 1)
4234                   quotient = op0;
4235                 else if (d == -1)
4236                   quotient = expand_unop (compute_mode, neg_optab, op0,
4237                                           tquotient, 0);
4238                 else if (HOST_BITS_PER_WIDE_INT >= size
4239                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4240                   {
4241                     /* This case is not handled correctly below.  */
4242                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4243                                                 compute_mode, 1, 1);
4244                     if (quotient == 0)
4245                       goto fail1;
4246                   }
4247                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4248                          && (rem_flag
4249                              ? smod_pow2_cheap (speed, compute_mode)
4250                              : sdiv_pow2_cheap (speed, compute_mode))
4251                          /* We assume that cheap metric is true if the
4252                             optab has an expander for this mode.  */
4253                          && ((optab_handler ((rem_flag ? smod_optab
4254                                               : sdiv_optab),
4255                                              compute_mode)
4256                               != CODE_FOR_nothing)
4257                              || (optab_handler (sdivmod_optab,
4258                                                 compute_mode)
4259                                  != CODE_FOR_nothing)))
4260                   ;
4261                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4262                   {
4263                     if (rem_flag)
4264                       {
4265                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4266                         if (remainder)
4267                           return gen_lowpart (mode, remainder);
4268                       }
4269
4270                     if (sdiv_pow2_cheap (speed, compute_mode)
4271                         && ((optab_handler (sdiv_optab, compute_mode)
4272                              != CODE_FOR_nothing)
4273                             || (optab_handler (sdivmod_optab, compute_mode)
4274                                 != CODE_FOR_nothing)))
4275                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4276                                                 compute_mode, op0,
4277                                                 gen_int_mode (abs_d,
4278                                                               compute_mode),
4279                                                 NULL_RTX, 0);
4280                     else
4281                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4282
4283                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4284                        negate the quotient.  */
4285                     if (d < 0)
4286                       {
4287                         insn = get_last_insn ();
4288                         if (insn != last
4289                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4290                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4291                           set_dst_reg_note (insn, REG_EQUAL,
4292                                             gen_rtx_DIV (compute_mode, op0,
4293                                                          gen_int_mode
4294                                                            (abs_d,
4295                                                             compute_mode)),
4296                                             quotient);
4297
4298                         quotient = expand_unop (compute_mode, neg_optab,
4299                                                 quotient, quotient, 0);
4300                       }
4301                   }
4302                 else if (size <= HOST_BITS_PER_WIDE_INT)
4303                   {
4304                     choose_multiplier (abs_d, size, size - 1,
4305                                        &ml, &post_shift, &lgup);
4306                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4307                       {
4308                         rtx t1, t2, t3;
4309
4310                         if (post_shift >= BITS_PER_WORD
4311                             || size - 1 >= BITS_PER_WORD)
4312                           goto fail1;
4313
4314                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4315                                       + shift_cost (speed, compute_mode, size - 1)
4316                                       + add_cost (speed, compute_mode));
4317                         t1 = expmed_mult_highpart (compute_mode, op0,
4318                                                    GEN_INT (ml), NULL_RTX, 0,
4319                                                    max_cost - extra_cost);
4320                         if (t1 == 0)
4321                           goto fail1;
4322                         t2 = expand_shift
4323                           (RSHIFT_EXPR, compute_mode, t1,
4324                            post_shift, NULL_RTX, 0);
4325                         t3 = expand_shift
4326                           (RSHIFT_EXPR, compute_mode, op0,
4327                            size - 1, NULL_RTX, 0);
4328                         if (d < 0)
4329                           quotient
4330                             = force_operand (gen_rtx_MINUS (compute_mode,
4331                                                             t3, t2),
4332                                              tquotient);
4333                         else
4334                           quotient
4335                             = force_operand (gen_rtx_MINUS (compute_mode,
4336                                                             t2, t3),
4337                                              tquotient);
4338                       }
4339                     else
4340                       {
4341                         rtx t1, t2, t3, t4;
4342
4343                         if (post_shift >= BITS_PER_WORD
4344                             || size - 1 >= BITS_PER_WORD)
4345                           goto fail1;
4346
4347                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4348                         mlr = gen_int_mode (ml, compute_mode);
4349                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4350                                       + shift_cost (speed, compute_mode, size - 1)
4351                                       + 2 * add_cost (speed, compute_mode));
4352                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4353                                                    NULL_RTX, 0,
4354                                                    max_cost - extra_cost);
4355                         if (t1 == 0)
4356                           goto fail1;
4357                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4358                                                           t1, op0),
4359                                             NULL_RTX);
4360                         t3 = expand_shift
4361                           (RSHIFT_EXPR, compute_mode, t2,
4362                            post_shift, NULL_RTX, 0);
4363                         t4 = expand_shift
4364                           (RSHIFT_EXPR, compute_mode, op0,
4365                            size - 1, NULL_RTX, 0);
4366                         if (d < 0)
4367                           quotient
4368                             = force_operand (gen_rtx_MINUS (compute_mode,
4369                                                             t4, t3),
4370                                              tquotient);
4371                         else
4372                           quotient
4373                             = force_operand (gen_rtx_MINUS (compute_mode,
4374                                                             t3, t4),
4375                                              tquotient);
4376                       }
4377                   }
4378                 else            /* Too wide mode to use tricky code */
4379                   break;
4380
4381                 insn = get_last_insn ();
4382                 if (insn != last)
4383                   set_dst_reg_note (insn, REG_EQUAL,
4384                                     gen_rtx_DIV (compute_mode, op0, op1),
4385                                     quotient);
4386               }
4387             break;
4388           }
4389       fail1:
4390         delete_insns_since (last);
4391         break;
4392
4393       case FLOOR_DIV_EXPR:
4394       case FLOOR_MOD_EXPR:
4395       /* We will come here only for signed operations.  */
4396         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4397           {
4398             unsigned HOST_WIDE_INT mh, ml;
4399             int pre_shift, lgup, post_shift;
4400             HOST_WIDE_INT d = INTVAL (op1);
4401
4402             if (d > 0)
4403               {
4404                 /* We could just as easily deal with negative constants here,
4405                    but it does not seem worth the trouble for GCC 2.6.  */
4406                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4407                   {
4408                     pre_shift = floor_log2 (d);
4409                     if (rem_flag)
4410                       {
4411                         remainder = expand_binop (compute_mode, and_optab, op0,
4412                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4413                                                   remainder, 0, OPTAB_LIB_WIDEN);
4414                         if (remainder)
4415                           return gen_lowpart (mode, remainder);
4416                       }
4417                     quotient = expand_shift
4418                       (RSHIFT_EXPR, compute_mode, op0,
4419                        pre_shift, tquotient, 0);
4420                   }
4421                 else
4422                   {
4423                     rtx t1, t2, t3, t4;
4424
4425                     mh = choose_multiplier (d, size, size - 1,
4426                                             &ml, &post_shift, &lgup);
4427                     gcc_assert (!mh);
4428
4429                     if (post_shift < BITS_PER_WORD
4430                         && size - 1 < BITS_PER_WORD)
4431                       {
4432                         t1 = expand_shift
4433                           (RSHIFT_EXPR, compute_mode, op0,
4434                            size - 1, NULL_RTX, 0);
4435                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4436                                            NULL_RTX, 0, OPTAB_WIDEN);
4437                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4438                                       + shift_cost (speed, compute_mode, size - 1)
4439                                       + 2 * add_cost (speed, compute_mode));
4440                         t3 = expmed_mult_highpart (compute_mode, t2,
4441                                                    GEN_INT (ml), NULL_RTX, 1,
4442                                                    max_cost - extra_cost);
4443                         if (t3 != 0)
4444                           {
4445                             t4 = expand_shift
4446                               (RSHIFT_EXPR, compute_mode, t3,
4447                                post_shift, NULL_RTX, 1);
4448                             quotient = expand_binop (compute_mode, xor_optab,
4449                                                      t4, t1, tquotient, 0,
4450                                                      OPTAB_WIDEN);
4451                           }
4452                       }
4453                   }
4454               }
4455             else
4456               {
4457                 rtx nsign, t1, t2, t3, t4;
4458                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4459                                                   op0, constm1_rtx), NULL_RTX);
4460                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4461                                    0, OPTAB_WIDEN);
4462                 nsign = expand_shift
4463                   (RSHIFT_EXPR, compute_mode, t2,
4464                    size - 1, NULL_RTX, 0);
4465                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4466                                     NULL_RTX);
4467                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4468                                     NULL_RTX, 0);
4469                 if (t4)
4470                   {
4471                     rtx t5;
4472                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4473                                       NULL_RTX, 0);
4474                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4475                                                             t4, t5),
4476                                               tquotient);
4477                   }
4478               }
4479           }
4480
4481         if (quotient != 0)
4482           break;
4483         delete_insns_since (last);
4484
4485         /* Try using an instruction that produces both the quotient and
4486            remainder, using truncation.  We can easily compensate the quotient
4487            or remainder to get floor rounding, once we have the remainder.
4488            Notice that we compute also the final remainder value here,
4489            and return the result right away.  */
4490         if (target == 0 || GET_MODE (target) != compute_mode)
4491           target = gen_reg_rtx (compute_mode);
4492
4493         if (rem_flag)
4494           {
4495             remainder
4496               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4497             quotient = gen_reg_rtx (compute_mode);
4498           }
4499         else
4500           {
4501             quotient
4502               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4503             remainder = gen_reg_rtx (compute_mode);
4504           }
4505
4506         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4507                                  quotient, remainder, 0))
4508           {
4509             /* This could be computed with a branch-less sequence.
4510                Save that for later.  */
4511             rtx tem;
4512             rtx label = gen_label_rtx ();
4513             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4514             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4515                                 NULL_RTX, 0, OPTAB_WIDEN);
4516             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4517             expand_dec (quotient, const1_rtx);
4518             expand_inc (remainder, op1);
4519             emit_label (label);
4520             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4521           }
4522
4523         /* No luck with division elimination or divmod.  Have to do it
4524            by conditionally adjusting op0 *and* the result.  */
4525         {
4526           rtx label1, label2, label3, label4, label5;
4527           rtx adjusted_op0;
4528           rtx tem;
4529
4530           quotient = gen_reg_rtx (compute_mode);
4531           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4532           label1 = gen_label_rtx ();
4533           label2 = gen_label_rtx ();
4534           label3 = gen_label_rtx ();
4535           label4 = gen_label_rtx ();
4536           label5 = gen_label_rtx ();
4537           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4538           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4539           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4540                               quotient, 0, OPTAB_LIB_WIDEN);
4541           if (tem != quotient)
4542             emit_move_insn (quotient, tem);
4543           emit_jump_insn (gen_jump (label5));
4544           emit_barrier ();
4545           emit_label (label1);
4546           expand_inc (adjusted_op0, const1_rtx);
4547           emit_jump_insn (gen_jump (label4));
4548           emit_barrier ();
4549           emit_label (label2);
4550           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4551           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4552                               quotient, 0, OPTAB_LIB_WIDEN);
4553           if (tem != quotient)
4554             emit_move_insn (quotient, tem);
4555           emit_jump_insn (gen_jump (label5));
4556           emit_barrier ();
4557           emit_label (label3);
4558           expand_dec (adjusted_op0, const1_rtx);
4559           emit_label (label4);
4560           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4561                               quotient, 0, OPTAB_LIB_WIDEN);
4562           if (tem != quotient)
4563             emit_move_insn (quotient, tem);
4564           expand_dec (quotient, const1_rtx);
4565           emit_label (label5);
4566         }
4567         break;
4568
4569       case CEIL_DIV_EXPR:
4570       case CEIL_MOD_EXPR:
4571         if (unsignedp)
4572           {
4573             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4574               {
4575                 rtx t1, t2, t3;
4576                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4577                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4578                                    floor_log2 (d), tquotient, 1);
4579                 t2 = expand_binop (compute_mode, and_optab, op0,
4580                                    GEN_INT (d - 1),
4581                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4582                 t3 = gen_reg_rtx (compute_mode);
4583                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4584                                       compute_mode, 1, 1);
4585                 if (t3 == 0)
4586                   {
4587                     rtx lab;
4588                     lab = gen_label_rtx ();
4589                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4590                     expand_inc (t1, const1_rtx);
4591                     emit_label (lab);
4592                     quotient = t1;
4593                   }
4594                 else
4595                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4596                                                           t1, t3),
4597                                             tquotient);
4598                 break;
4599               }
4600
4601             /* Try using an instruction that produces both the quotient and
4602                remainder, using truncation.  We can easily compensate the
4603                quotient or remainder to get ceiling rounding, once we have the
4604                remainder.  Notice that we compute also the final remainder
4605                value here, and return the result right away.  */
4606             if (target == 0 || GET_MODE (target) != compute_mode)
4607               target = gen_reg_rtx (compute_mode);
4608
4609             if (rem_flag)
4610               {
4611                 remainder = (REG_P (target)
4612                              ? target : gen_reg_rtx (compute_mode));
4613                 quotient = gen_reg_rtx (compute_mode);
4614               }
4615             else
4616               {
4617                 quotient = (REG_P (target)
4618                             ? target : gen_reg_rtx (compute_mode));
4619                 remainder = gen_reg_rtx (compute_mode);
4620               }
4621
4622             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4623                                      remainder, 1))
4624               {
4625                 /* This could be computed with a branch-less sequence.
4626                    Save that for later.  */
4627                 rtx label = gen_label_rtx ();
4628                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4629                                  compute_mode, label);
4630                 expand_inc (quotient, const1_rtx);
4631                 expand_dec (remainder, op1);
4632                 emit_label (label);
4633                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4634               }
4635
4636             /* No luck with division elimination or divmod.  Have to do it
4637                by conditionally adjusting op0 *and* the result.  */
4638             {
4639               rtx label1, label2;
4640               rtx adjusted_op0, tem;
4641
4642               quotient = gen_reg_rtx (compute_mode);
4643               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4644               label1 = gen_label_rtx ();
4645               label2 = gen_label_rtx ();
4646               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4647                                compute_mode, label1);
4648               emit_move_insn  (quotient, const0_rtx);
4649               emit_jump_insn (gen_jump (label2));
4650               emit_barrier ();
4651               emit_label (label1);
4652               expand_dec (adjusted_op0, const1_rtx);
4653               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4654                                   quotient, 1, OPTAB_LIB_WIDEN);
4655               if (tem != quotient)
4656                 emit_move_insn (quotient, tem);
4657               expand_inc (quotient, const1_rtx);
4658               emit_label (label2);
4659             }
4660           }
4661         else /* signed */
4662           {
4663             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4664                 && INTVAL (op1) >= 0)
4665               {
4666                 /* This is extremely similar to the code for the unsigned case
4667                    above.  For 2.7 we should merge these variants, but for
4668                    2.6.1 I don't want to touch the code for unsigned since that
4669                    get used in C.  The signed case will only be used by other
4670                    languages (Ada).  */
4671
4672                 rtx t1, t2, t3;
4673                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4674                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4675                                    floor_log2 (d), tquotient, 0);
4676                 t2 = expand_binop (compute_mode, and_optab, op0,
4677                                    GEN_INT (d - 1),
4678                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4679                 t3 = gen_reg_rtx (compute_mode);
4680                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4681                                       compute_mode, 1, 1);
4682                 if (t3 == 0)
4683                   {
4684                     rtx lab;
4685                     lab = gen_label_rtx ();
4686                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4687                     expand_inc (t1, const1_rtx);
4688                     emit_label (lab);
4689                     quotient = t1;
4690                   }
4691                 else
4692                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4693                                                           t1, t3),
4694                                             tquotient);
4695                 break;
4696               }
4697
4698             /* Try using an instruction that produces both the quotient and
4699                remainder, using truncation.  We can easily compensate the
4700                quotient or remainder to get ceiling rounding, once we have the
4701                remainder.  Notice that we compute also the final remainder
4702                value here, and return the result right away.  */
4703             if (target == 0 || GET_MODE (target) != compute_mode)
4704               target = gen_reg_rtx (compute_mode);
4705             if (rem_flag)
4706               {
4707                 remainder= (REG_P (target)
4708                             ? target : gen_reg_rtx (compute_mode));
4709                 quotient = gen_reg_rtx (compute_mode);
4710               }
4711             else
4712               {
4713                 quotient = (REG_P (target)
4714                             ? target : gen_reg_rtx (compute_mode));
4715                 remainder = gen_reg_rtx (compute_mode);
4716               }
4717
4718             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4719                                      remainder, 0))
4720               {
4721                 /* This could be computed with a branch-less sequence.
4722                    Save that for later.  */
4723                 rtx tem;
4724                 rtx label = gen_label_rtx ();
4725                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4726                                  compute_mode, label);
4727                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4728                                     NULL_RTX, 0, OPTAB_WIDEN);
4729                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4730                 expand_inc (quotient, const1_rtx);
4731                 expand_dec (remainder, op1);
4732                 emit_label (label);
4733                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4734               }
4735
4736             /* No luck with division elimination or divmod.  Have to do it
4737                by conditionally adjusting op0 *and* the result.  */
4738             {
4739               rtx label1, label2, label3, label4, label5;
4740               rtx adjusted_op0;
4741               rtx tem;
4742
4743               quotient = gen_reg_rtx (compute_mode);
4744               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4745               label1 = gen_label_rtx ();
4746               label2 = gen_label_rtx ();
4747               label3 = gen_label_rtx ();
4748               label4 = gen_label_rtx ();
4749               label5 = gen_label_rtx ();
4750               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4751               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4752                                compute_mode, label1);
4753               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4754                                   quotient, 0, OPTAB_LIB_WIDEN);
4755               if (tem != quotient)
4756                 emit_move_insn (quotient, tem);
4757               emit_jump_insn (gen_jump (label5));
4758               emit_barrier ();
4759               emit_label (label1);
4760               expand_dec (adjusted_op0, const1_rtx);
4761               emit_jump_insn (gen_jump (label4));
4762               emit_barrier ();
4763               emit_label (label2);
4764               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4765                                compute_mode, label3);
4766               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4767                                   quotient, 0, OPTAB_LIB_WIDEN);
4768               if (tem != quotient)
4769                 emit_move_insn (quotient, tem);
4770               emit_jump_insn (gen_jump (label5));
4771               emit_barrier ();
4772               emit_label (label3);
4773               expand_inc (adjusted_op0, const1_rtx);
4774               emit_label (label4);
4775               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4776                                   quotient, 0, OPTAB_LIB_WIDEN);
4777               if (tem != quotient)
4778                 emit_move_insn (quotient, tem);
4779               expand_inc (quotient, const1_rtx);
4780               emit_label (label5);
4781             }
4782           }
4783         break;
4784
4785       case EXACT_DIV_EXPR:
4786         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4787           {
4788             HOST_WIDE_INT d = INTVAL (op1);
4789             unsigned HOST_WIDE_INT ml;
4790             int pre_shift;
4791             rtx t1;
4792
4793             pre_shift = floor_log2 (d & -d);
4794             ml = invert_mod2n (d >> pre_shift, size);
4795             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4796                                pre_shift, NULL_RTX, unsignedp);
4797             quotient = expand_mult (compute_mode, t1,
4798                                     gen_int_mode (ml, compute_mode),
4799                                     NULL_RTX, 1);
4800
4801             insn = get_last_insn ();
4802             set_dst_reg_note (insn, REG_EQUAL,
4803                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4804                                               compute_mode, op0, op1),
4805                               quotient);
4806           }
4807         break;
4808
4809       case ROUND_DIV_EXPR:
4810       case ROUND_MOD_EXPR:
4811         if (unsignedp)
4812           {
4813             rtx tem;
4814             rtx label;
4815             label = gen_label_rtx ();
4816             quotient = gen_reg_rtx (compute_mode);
4817             remainder = gen_reg_rtx (compute_mode);
4818             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4819               {
4820                 rtx tem;
4821                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4822                                          quotient, 1, OPTAB_LIB_WIDEN);
4823                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4824                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4825                                           remainder, 1, OPTAB_LIB_WIDEN);
4826               }
4827             tem = plus_constant (compute_mode, op1, -1);
4828             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4829             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4830             expand_inc (quotient, const1_rtx);
4831             expand_dec (remainder, op1);
4832             emit_label (label);
4833           }
4834         else
4835           {
4836             rtx abs_rem, abs_op1, tem, mask;
4837             rtx label;
4838             label = gen_label_rtx ();
4839             quotient = gen_reg_rtx (compute_mode);
4840             remainder = gen_reg_rtx (compute_mode);
4841             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4842               {
4843                 rtx tem;
4844                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4845                                          quotient, 0, OPTAB_LIB_WIDEN);
4846                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4847                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4848                                           remainder, 0, OPTAB_LIB_WIDEN);
4849               }
4850             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4851             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4852             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4853                                 1, NULL_RTX, 1);
4854             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4855             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4856                                 NULL_RTX, 0, OPTAB_WIDEN);
4857             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4858                                  size - 1, NULL_RTX, 0);
4859             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4860                                 NULL_RTX, 0, OPTAB_WIDEN);
4861             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4862                                 NULL_RTX, 0, OPTAB_WIDEN);
4863             expand_inc (quotient, tem);
4864             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4865                                 NULL_RTX, 0, OPTAB_WIDEN);
4866             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4867                                 NULL_RTX, 0, OPTAB_WIDEN);
4868             expand_dec (remainder, tem);
4869             emit_label (label);
4870           }
4871         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4872
4873       default:
4874         gcc_unreachable ();
4875       }
4876
4877   if (quotient == 0)
4878     {
4879       if (target && GET_MODE (target) != compute_mode)
4880         target = 0;
4881
4882       if (rem_flag)
4883         {
4884           /* Try to produce the remainder without producing the quotient.
4885              If we seem to have a divmod pattern that does not require widening,
4886              don't try widening here.  We should really have a WIDEN argument
4887              to expand_twoval_binop, since what we'd really like to do here is
4888              1) try a mod insn in compute_mode
4889              2) try a divmod insn in compute_mode
4890              3) try a div insn in compute_mode and multiply-subtract to get
4891                 remainder
4892              4) try the same things with widening allowed.  */
4893           remainder
4894             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4895                                  op0, op1, target,
4896                                  unsignedp,
4897                                  ((optab_handler (optab2, compute_mode)
4898                                    != CODE_FOR_nothing)
4899                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4900           if (remainder == 0)
4901             {
4902               /* No luck there.  Can we do remainder and divide at once
4903                  without a library call?  */
4904               remainder = gen_reg_rtx (compute_mode);
4905               if (! expand_twoval_binop ((unsignedp
4906                                           ? udivmod_optab
4907                                           : sdivmod_optab),
4908                                          op0, op1,
4909                                          NULL_RTX, remainder, unsignedp))
4910                 remainder = 0;
4911             }
4912
4913           if (remainder)
4914             return gen_lowpart (mode, remainder);
4915         }
4916
4917       /* Produce the quotient.  Try a quotient insn, but not a library call.
4918          If we have a divmod in this mode, use it in preference to widening
4919          the div (for this test we assume it will not fail). Note that optab2
4920          is set to the one of the two optabs that the call below will use.  */
4921       quotient
4922         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4923                              op0, op1, rem_flag ? NULL_RTX : target,
4924                              unsignedp,
4925                              ((optab_handler (optab2, compute_mode)
4926                                != CODE_FOR_nothing)
4927                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4928
4929       if (quotient == 0)
4930         {
4931           /* No luck there.  Try a quotient-and-remainder insn,
4932              keeping the quotient alone.  */
4933           quotient = gen_reg_rtx (compute_mode);
4934           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4935                                      op0, op1,
4936                                      quotient, NULL_RTX, unsignedp))
4937             {
4938               quotient = 0;
4939               if (! rem_flag)
4940                 /* Still no luck.  If we are not computing the remainder,
4941                    use a library call for the quotient.  */
4942                 quotient = sign_expand_binop (compute_mode,
4943                                               udiv_optab, sdiv_optab,
4944                                               op0, op1, target,
4945                                               unsignedp, OPTAB_LIB_WIDEN);
4946             }
4947         }
4948     }
4949
4950   if (rem_flag)
4951     {
4952       if (target && GET_MODE (target) != compute_mode)
4953         target = 0;
4954
4955       if (quotient == 0)
4956         {
4957           /* No divide instruction either.  Use library for remainder.  */
4958           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4959                                          op0, op1, target,
4960                                          unsignedp, OPTAB_LIB_WIDEN);
4961           /* No remainder function.  Try a quotient-and-remainder
4962              function, keeping the remainder.  */
4963           if (!remainder)
4964             {
4965               remainder = gen_reg_rtx (compute_mode);
4966               if (!expand_twoval_binop_libfunc
4967                   (unsignedp ? udivmod_optab : sdivmod_optab,
4968                    op0, op1,
4969                    NULL_RTX, remainder,
4970                    unsignedp ? UMOD : MOD))
4971                 remainder = NULL_RTX;
4972             }
4973         }
4974       else
4975         {
4976           /* We divided.  Now finish doing X - Y * (X / Y).  */
4977           remainder = expand_mult (compute_mode, quotient, op1,
4978                                    NULL_RTX, unsignedp);
4979           remainder = expand_binop (compute_mode, sub_optab, op0,
4980                                     remainder, target, unsignedp,
4981                                     OPTAB_LIB_WIDEN);
4982         }
4983     }
4984
4985   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4986 }
4987 \f
4988 /* Return a tree node with data type TYPE, describing the value of X.
4989    Usually this is an VAR_DECL, if there is no obvious better choice.
4990    X may be an expression, however we only support those expressions
4991    generated by loop.c.  */
4992
4993 tree
4994 make_tree (tree type, rtx x)
4995 {
4996   tree t;
4997
4998   switch (GET_CODE (x))
4999     {
5000     case CONST_INT:
5001       {
5002         HOST_WIDE_INT hi = 0;
5003
5004         if (INTVAL (x) < 0
5005             && !(TYPE_UNSIGNED (type)
5006                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
5007                      < HOST_BITS_PER_WIDE_INT)))
5008           hi = -1;
5009
5010         t = build_int_cst_wide (type, INTVAL (x), hi);
5011
5012         return t;
5013       }
5014
5015     case CONST_DOUBLE:
5016       if (GET_MODE (x) == VOIDmode)
5017         t = build_int_cst_wide (type,
5018                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
5019       else
5020         {
5021           REAL_VALUE_TYPE d;
5022
5023           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
5024           t = build_real (type, d);
5025         }
5026
5027       return t;
5028
5029     case CONST_VECTOR:
5030       {
5031         int units = CONST_VECTOR_NUNITS (x);
5032         tree itype = TREE_TYPE (type);
5033         tree *elts;
5034         int i;
5035
5036         /* Build a tree with vector elements.  */
5037         elts = XALLOCAVEC (tree, units);
5038         for (i = units - 1; i >= 0; --i)
5039           {
5040             rtx elt = CONST_VECTOR_ELT (x, i);
5041             elts[i] = make_tree (itype, elt);
5042           }
5043
5044         return build_vector (type, elts);
5045       }
5046
5047     case PLUS:
5048       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5049                           make_tree (type, XEXP (x, 1)));
5050
5051     case MINUS:
5052       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
5053                           make_tree (type, XEXP (x, 1)));
5054
5055     case NEG:
5056       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
5057
5058     case MULT:
5059       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
5060                           make_tree (type, XEXP (x, 1)));
5061
5062     case ASHIFT:
5063       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
5064                           make_tree (type, XEXP (x, 1)));
5065
5066     case LSHIFTRT:
5067       t = unsigned_type_for (type);
5068       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5069                                          make_tree (t, XEXP (x, 0)),
5070                                          make_tree (type, XEXP (x, 1))));
5071
5072     case ASHIFTRT:
5073       t = signed_type_for (type);
5074       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5075                                          make_tree (t, XEXP (x, 0)),
5076                                          make_tree (type, XEXP (x, 1))));
5077
5078     case DIV:
5079       if (TREE_CODE (type) != REAL_TYPE)
5080         t = signed_type_for (type);
5081       else
5082         t = type;
5083
5084       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5085                                          make_tree (t, XEXP (x, 0)),
5086                                          make_tree (t, XEXP (x, 1))));
5087     case UDIV:
5088       t = unsigned_type_for (type);
5089       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5090                                          make_tree (t, XEXP (x, 0)),
5091                                          make_tree (t, XEXP (x, 1))));
5092
5093     case SIGN_EXTEND:
5094     case ZERO_EXTEND:
5095       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5096                                           GET_CODE (x) == ZERO_EXTEND);
5097       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5098
5099     case CONST:
5100       return make_tree (type, XEXP (x, 0));
5101
5102     case SYMBOL_REF:
5103       t = SYMBOL_REF_DECL (x);
5104       if (t)
5105         return fold_convert (type, build_fold_addr_expr (t));
5106       /* else fall through.  */
5107
5108     default:
5109       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5110
5111       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5112          address mode to pointer mode.  */
5113       if (POINTER_TYPE_P (type))
5114         x = convert_memory_address_addr_space
5115               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5116
5117       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5118          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5119       t->decl_with_rtl.rtl = x;
5120
5121       return t;
5122     }
5123 }
5124 \f
5125 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5126    and returning TARGET.
5127
5128    If TARGET is 0, a pseudo-register or constant is returned.  */
5129
5130 rtx
5131 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5132 {
5133   rtx tem = 0;
5134
5135   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5136     tem = simplify_binary_operation (AND, mode, op0, op1);
5137   if (tem == 0)
5138     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5139
5140   if (target == 0)
5141     target = tem;
5142   else if (tem != target)
5143     emit_move_insn (target, tem);
5144   return target;
5145 }
5146
5147 /* Helper function for emit_store_flag.  */
5148 static rtx
5149 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5150              enum machine_mode mode, enum machine_mode compare_mode,
5151              int unsignedp, rtx x, rtx y, int normalizep,
5152              enum machine_mode target_mode)
5153 {
5154   struct expand_operand ops[4];
5155   rtx op0, last, comparison, subtarget;
5156   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5157
5158   last = get_last_insn ();
5159   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5160   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5161   if (!x || !y)
5162     {
5163       delete_insns_since (last);
5164       return NULL_RTX;
5165     }
5166
5167   if (target_mode == VOIDmode)
5168     target_mode = result_mode;
5169   if (!target)
5170     target = gen_reg_rtx (target_mode);
5171
5172   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5173
5174   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5175   create_fixed_operand (&ops[1], comparison);
5176   create_fixed_operand (&ops[2], x);
5177   create_fixed_operand (&ops[3], y);
5178   if (!maybe_expand_insn (icode, 4, ops))
5179     {
5180       delete_insns_since (last);
5181       return NULL_RTX;
5182     }
5183   subtarget = ops[0].value;
5184
5185   /* If we are converting to a wider mode, first convert to
5186      TARGET_MODE, then normalize.  This produces better combining
5187      opportunities on machines that have a SIGN_EXTRACT when we are
5188      testing a single bit.  This mostly benefits the 68k.
5189
5190      If STORE_FLAG_VALUE does not have the sign bit set when
5191      interpreted in MODE, we can do this conversion as unsigned, which
5192      is usually more efficient.  */
5193   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5194     {
5195       convert_move (target, subtarget,
5196                     val_signbit_known_clear_p (result_mode,
5197                                                STORE_FLAG_VALUE));
5198       op0 = target;
5199       result_mode = target_mode;
5200     }
5201   else
5202     op0 = subtarget;
5203
5204   /* If we want to keep subexpressions around, don't reuse our last
5205      target.  */
5206   if (optimize)
5207     subtarget = 0;
5208
5209   /* Now normalize to the proper value in MODE.  Sometimes we don't
5210      have to do anything.  */
5211   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5212     ;
5213   /* STORE_FLAG_VALUE might be the most negative number, so write
5214      the comparison this way to avoid a compiler-time warning.  */
5215   else if (- normalizep == STORE_FLAG_VALUE)
5216     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5217
5218   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5219      it hard to use a value of just the sign bit due to ANSI integer
5220      constant typing rules.  */
5221   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5222     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5223                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5224                         normalizep == 1);
5225   else
5226     {
5227       gcc_assert (STORE_FLAG_VALUE & 1);
5228
5229       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5230       if (normalizep == -1)
5231         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5232     }
5233
5234   /* If we were converting to a smaller mode, do the conversion now.  */
5235   if (target_mode != result_mode)
5236     {
5237       convert_move (target, op0, 0);
5238       return target;
5239     }
5240   else
5241     return op0;
5242 }
5243
5244
5245 /* A subroutine of emit_store_flag only including "tricks" that do not
5246    need a recursive call.  These are kept separate to avoid infinite
5247    loops.  */
5248
5249 static rtx
5250 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5251                    enum machine_mode mode, int unsignedp, int normalizep,
5252                    enum machine_mode target_mode)
5253 {
5254   rtx subtarget;
5255   enum insn_code icode;
5256   enum machine_mode compare_mode;
5257   enum mode_class mclass;
5258   enum rtx_code scode;
5259   rtx tem;
5260
5261   if (unsignedp)
5262     code = unsigned_condition (code);
5263   scode = swap_condition (code);
5264
5265   /* If one operand is constant, make it the second one.  Only do this
5266      if the other operand is not constant as well.  */
5267
5268   if (swap_commutative_operands_p (op0, op1))
5269     {
5270       tem = op0;
5271       op0 = op1;
5272       op1 = tem;
5273       code = swap_condition (code);
5274     }
5275
5276   if (mode == VOIDmode)
5277     mode = GET_MODE (op0);
5278
5279   /* For some comparisons with 1 and -1, we can convert this to
5280      comparisons with zero.  This will often produce more opportunities for
5281      store-flag insns.  */
5282
5283   switch (code)
5284     {
5285     case LT:
5286       if (op1 == const1_rtx)
5287         op1 = const0_rtx, code = LE;
5288       break;
5289     case LE:
5290       if (op1 == constm1_rtx)
5291         op1 = const0_rtx, code = LT;
5292       break;
5293     case GE:
5294       if (op1 == const1_rtx)
5295         op1 = const0_rtx, code = GT;
5296       break;
5297     case GT:
5298       if (op1 == constm1_rtx)
5299         op1 = const0_rtx, code = GE;
5300       break;
5301     case GEU:
5302       if (op1 == const1_rtx)
5303         op1 = const0_rtx, code = NE;
5304       break;
5305     case LTU:
5306       if (op1 == const1_rtx)
5307         op1 = const0_rtx, code = EQ;
5308       break;
5309     default:
5310       break;
5311     }
5312
5313   /* If we are comparing a double-word integer with zero or -1, we can
5314      convert the comparison into one involving a single word.  */
5315   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5316       && GET_MODE_CLASS (mode) == MODE_INT
5317       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5318     {
5319       if ((code == EQ || code == NE)
5320           && (op1 == const0_rtx || op1 == constm1_rtx))
5321         {
5322           rtx op00, op01;
5323
5324           /* Do a logical OR or AND of the two words and compare the
5325              result.  */
5326           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5327           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5328           tem = expand_binop (word_mode,
5329                               op1 == const0_rtx ? ior_optab : and_optab,
5330                               op00, op01, NULL_RTX, unsignedp,
5331                               OPTAB_DIRECT);
5332
5333           if (tem != 0)
5334             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5335                                    unsignedp, normalizep);
5336         }
5337       else if ((code == LT || code == GE) && op1 == const0_rtx)
5338         {
5339           rtx op0h;
5340
5341           /* If testing the sign bit, can just test on high word.  */
5342           op0h = simplify_gen_subreg (word_mode, op0, mode,
5343                                       subreg_highpart_offset (word_mode,
5344                                                               mode));
5345           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5346                                  unsignedp, normalizep);
5347         }
5348       else
5349         tem = NULL_RTX;
5350
5351       if (tem)
5352         {
5353           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5354             return tem;
5355           if (!target)
5356             target = gen_reg_rtx (target_mode);
5357
5358           convert_move (target, tem,
5359                         !val_signbit_known_set_p (word_mode,
5360                                                   (normalizep ? normalizep
5361                                                    : STORE_FLAG_VALUE)));
5362           return target;
5363         }
5364     }
5365
5366   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5367      complement of A (for GE) and shifting the sign bit to the low bit.  */
5368   if (op1 == const0_rtx && (code == LT || code == GE)
5369       && GET_MODE_CLASS (mode) == MODE_INT
5370       && (normalizep || STORE_FLAG_VALUE == 1
5371           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5372     {
5373       subtarget = target;
5374
5375       if (!target)
5376         target_mode = mode;
5377
5378       /* If the result is to be wider than OP0, it is best to convert it
5379          first.  If it is to be narrower, it is *incorrect* to convert it
5380          first.  */
5381       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5382         {
5383           op0 = convert_modes (target_mode, mode, op0, 0);
5384           mode = target_mode;
5385         }
5386
5387       if (target_mode != mode)
5388         subtarget = 0;
5389
5390       if (code == GE)
5391         op0 = expand_unop (mode, one_cmpl_optab, op0,
5392                            ((STORE_FLAG_VALUE == 1 || normalizep)
5393                             ? 0 : subtarget), 0);
5394
5395       if (STORE_FLAG_VALUE == 1 || normalizep)
5396         /* If we are supposed to produce a 0/1 value, we want to do
5397            a logical shift from the sign bit to the low-order bit; for
5398            a -1/0 value, we do an arithmetic shift.  */
5399         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5400                             GET_MODE_BITSIZE (mode) - 1,
5401                             subtarget, normalizep != -1);
5402
5403       if (mode != target_mode)
5404         op0 = convert_modes (target_mode, mode, op0, 0);
5405
5406       return op0;
5407     }
5408
5409   mclass = GET_MODE_CLASS (mode);
5410   for (compare_mode = mode; compare_mode != VOIDmode;
5411        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5412     {
5413      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5414      icode = optab_handler (cstore_optab, optab_mode);
5415      if (icode != CODE_FOR_nothing)
5416         {
5417           do_pending_stack_adjust ();
5418           tem = emit_cstore (target, icode, code, mode, compare_mode,
5419                              unsignedp, op0, op1, normalizep, target_mode);
5420           if (tem)
5421             return tem;
5422
5423           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5424             {
5425               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5426                                  unsignedp, op1, op0, normalizep, target_mode);
5427               if (tem)
5428                 return tem;
5429             }
5430           break;
5431         }
5432     }
5433
5434   return 0;
5435 }
5436
5437 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5438    and storing in TARGET.  Normally return TARGET.
5439    Return 0 if that cannot be done.
5440
5441    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5442    it is VOIDmode, they cannot both be CONST_INT.
5443
5444    UNSIGNEDP is for the case where we have to widen the operands
5445    to perform the operation.  It says to use zero-extension.
5446
5447    NORMALIZEP is 1 if we should convert the result to be either zero
5448    or one.  Normalize is -1 if we should convert the result to be
5449    either zero or -1.  If NORMALIZEP is zero, the result will be left
5450    "raw" out of the scc insn.  */
5451
5452 rtx
5453 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5454                  enum machine_mode mode, int unsignedp, int normalizep)
5455 {
5456   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5457   enum rtx_code rcode;
5458   rtx subtarget;
5459   rtx tem, last, trueval;
5460
5461   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5462                            target_mode);
5463   if (tem)
5464     return tem;
5465
5466   /* If we reached here, we can't do this with a scc insn, however there
5467      are some comparisons that can be done in other ways.  Don't do any
5468      of these cases if branches are very cheap.  */
5469   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5470     return 0;
5471
5472   /* See what we need to return.  We can only return a 1, -1, or the
5473      sign bit.  */
5474
5475   if (normalizep == 0)
5476     {
5477       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5478         normalizep = STORE_FLAG_VALUE;
5479
5480       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5481         ;
5482       else
5483         return 0;
5484     }
5485
5486   last = get_last_insn ();
5487
5488   /* If optimizing, use different pseudo registers for each insn, instead
5489      of reusing the same pseudo.  This leads to better CSE, but slows
5490      down the compiler, since there are more pseudos */
5491   subtarget = (!optimize
5492                && (target_mode == mode)) ? target : NULL_RTX;
5493   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5494
5495   /* For floating-point comparisons, try the reverse comparison or try
5496      changing the "orderedness" of the comparison.  */
5497   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5498     {
5499       enum rtx_code first_code;
5500       bool and_them;
5501
5502       rcode = reverse_condition_maybe_unordered (code);
5503       if (can_compare_p (rcode, mode, ccp_store_flag)
5504           && (code == ORDERED || code == UNORDERED
5505               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5506               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5507         {
5508           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5509                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5510
5511           /* For the reverse comparison, use either an addition or a XOR.  */
5512           if (want_add
5513               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5514                            optimize_insn_for_speed_p ()) == 0)
5515             {
5516               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5517                                        STORE_FLAG_VALUE, target_mode);
5518               if (tem)
5519                 return expand_binop (target_mode, add_optab, tem,
5520                                      GEN_INT (normalizep),
5521                                      target, 0, OPTAB_WIDEN);
5522             }
5523           else if (!want_add
5524                    && rtx_cost (trueval, XOR, 1,
5525                                 optimize_insn_for_speed_p ()) == 0)
5526             {
5527               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5528                                        normalizep, target_mode);
5529               if (tem)
5530                 return expand_binop (target_mode, xor_optab, tem, trueval,
5531                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5532             }
5533         }
5534
5535       delete_insns_since (last);
5536
5537       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5538       if (code == ORDERED || code == UNORDERED)
5539         return 0;
5540
5541       and_them = split_comparison (code, mode, &first_code, &code);
5542
5543       /* If there are no NaNs, the first comparison should always fall through.
5544          Effectively change the comparison to the other one.  */
5545       if (!HONOR_NANS (mode))
5546         {
5547           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5548           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5549                                     target_mode);
5550         }
5551
5552 #ifdef HAVE_conditional_move
5553       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5554          conditional move.  */
5555       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5556                                normalizep, target_mode);
5557       if (tem == 0)
5558         return 0;
5559
5560       if (and_them)
5561         tem = emit_conditional_move (target, code, op0, op1, mode,
5562                                      tem, const0_rtx, GET_MODE (tem), 0);
5563       else
5564         tem = emit_conditional_move (target, code, op0, op1, mode,
5565                                      trueval, tem, GET_MODE (tem), 0);
5566
5567       if (tem == 0)
5568         delete_insns_since (last);
5569       return tem;
5570 #else
5571       return 0;
5572 #endif
5573     }
5574
5575   /* The remaining tricks only apply to integer comparisons.  */
5576
5577   if (GET_MODE_CLASS (mode) != MODE_INT)
5578     return 0;
5579
5580   /* If this is an equality comparison of integers, we can try to exclusive-or
5581      (or subtract) the two operands and use a recursive call to try the
5582      comparison with zero.  Don't do any of these cases if branches are
5583      very cheap.  */
5584
5585   if ((code == EQ || code == NE) && op1 != const0_rtx)
5586     {
5587       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5588                           OPTAB_WIDEN);
5589
5590       if (tem == 0)
5591         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5592                             OPTAB_WIDEN);
5593       if (tem != 0)
5594         tem = emit_store_flag (target, code, tem, const0_rtx,
5595                                mode, unsignedp, normalizep);
5596       if (tem != 0)
5597         return tem;
5598
5599       delete_insns_since (last);
5600     }
5601
5602   /* For integer comparisons, try the reverse comparison.  However, for
5603      small X and if we'd have anyway to extend, implementing "X != 0"
5604      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5605   rcode = reverse_condition (code);
5606   if (can_compare_p (rcode, mode, ccp_store_flag)
5607       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5608             && code == NE
5609             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5610             && op1 == const0_rtx))
5611     {
5612       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5613                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5614
5615       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5616       if (want_add
5617           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5618                        optimize_insn_for_speed_p ()) == 0)
5619         {
5620           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5621                                    STORE_FLAG_VALUE, target_mode);
5622           if (tem != 0)
5623             tem = expand_binop (target_mode, add_optab, tem,
5624                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5625         }
5626       else if (!want_add
5627                && rtx_cost (trueval, XOR, 1,
5628                             optimize_insn_for_speed_p ()) == 0)
5629         {
5630           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5631                                    normalizep, target_mode);
5632           if (tem != 0)
5633             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5634                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5635         }
5636
5637       if (tem != 0)
5638         return tem;
5639       delete_insns_since (last);
5640     }
5641
5642   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5643      the constant zero.  Reject all other comparisons at this point.  Only
5644      do LE and GT if branches are expensive since they are expensive on
5645      2-operand machines.  */
5646
5647   if (op1 != const0_rtx
5648       || (code != EQ && code != NE
5649           && (BRANCH_COST (optimize_insn_for_speed_p (),
5650                            false) <= 1 || (code != LE && code != GT))))
5651     return 0;
5652
5653   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5654      do the necessary operation below.  */
5655
5656   tem = 0;
5657
5658   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5659      the sign bit set.  */
5660
5661   if (code == LE)
5662     {
5663       /* This is destructive, so SUBTARGET can't be OP0.  */
5664       if (rtx_equal_p (subtarget, op0))
5665         subtarget = 0;
5666
5667       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5668                           OPTAB_WIDEN);
5669       if (tem)
5670         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5671                             OPTAB_WIDEN);
5672     }
5673
5674   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5675      number of bits in the mode of OP0, minus one.  */
5676
5677   if (code == GT)
5678     {
5679       if (rtx_equal_p (subtarget, op0))
5680         subtarget = 0;
5681
5682       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5683                           GET_MODE_BITSIZE (mode) - 1,
5684                           subtarget, 0);
5685       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5686                           OPTAB_WIDEN);
5687     }
5688
5689   if (code == EQ || code == NE)
5690     {
5691       /* For EQ or NE, one way to do the comparison is to apply an operation
5692          that converts the operand into a positive number if it is nonzero
5693          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5694          for NE we negate.  This puts the result in the sign bit.  Then we
5695          normalize with a shift, if needed.
5696
5697          Two operations that can do the above actions are ABS and FFS, so try
5698          them.  If that doesn't work, and MODE is smaller than a full word,
5699          we can use zero-extension to the wider mode (an unsigned conversion)
5700          as the operation.  */
5701
5702       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5703          that is compensated by the subsequent overflow when subtracting
5704          one / negating.  */
5705
5706       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5707         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5708       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5709         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5710       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5711         {
5712           tem = convert_modes (word_mode, mode, op0, 1);
5713           mode = word_mode;
5714         }
5715
5716       if (tem != 0)
5717         {
5718           if (code == EQ)
5719             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5720                                 0, OPTAB_WIDEN);
5721           else
5722             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5723         }
5724
5725       /* If we couldn't do it that way, for NE we can "or" the two's complement
5726          of the value with itself.  For EQ, we take the one's complement of
5727          that "or", which is an extra insn, so we only handle EQ if branches
5728          are expensive.  */
5729
5730       if (tem == 0
5731           && (code == NE
5732               || BRANCH_COST (optimize_insn_for_speed_p (),
5733                               false) > 1))
5734         {
5735           if (rtx_equal_p (subtarget, op0))
5736             subtarget = 0;
5737
5738           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5739           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5740                               OPTAB_WIDEN);
5741
5742           if (tem && code == EQ)
5743             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5744         }
5745     }
5746
5747   if (tem && normalizep)
5748     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5749                         GET_MODE_BITSIZE (mode) - 1,
5750                         subtarget, normalizep == 1);
5751
5752   if (tem)
5753     {
5754       if (!target)
5755         ;
5756       else if (GET_MODE (tem) != target_mode)
5757         {
5758           convert_move (target, tem, 0);
5759           tem = target;
5760         }
5761       else if (!subtarget)
5762         {
5763           emit_move_insn (target, tem);
5764           tem = target;
5765         }
5766     }
5767   else
5768     delete_insns_since (last);
5769
5770   return tem;
5771 }
5772
5773 /* Like emit_store_flag, but always succeeds.  */
5774
5775 rtx
5776 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5777                        enum machine_mode mode, int unsignedp, int normalizep)
5778 {
5779   rtx tem, label;
5780   rtx trueval, falseval;
5781
5782   /* First see if emit_store_flag can do the job.  */
5783   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5784   if (tem != 0)
5785     return tem;
5786
5787   if (!target)
5788     target = gen_reg_rtx (word_mode);
5789
5790   /* If this failed, we have to do this with set/compare/jump/set code.
5791      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5792   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5793   if (code == NE
5794       && GET_MODE_CLASS (mode) == MODE_INT
5795       && REG_P (target)
5796       && op0 == target
5797       && op1 == const0_rtx)
5798     {
5799       label = gen_label_rtx ();
5800       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5801                                mode, NULL_RTX, NULL_RTX, label, -1);
5802       emit_move_insn (target, trueval);
5803       emit_label (label);
5804       return target;
5805     }
5806
5807   if (!REG_P (target)
5808       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5809     target = gen_reg_rtx (GET_MODE (target));
5810
5811   /* Jump in the right direction if the target cannot implement CODE
5812      but can jump on its reverse condition.  */
5813   falseval = const0_rtx;
5814   if (! can_compare_p (code, mode, ccp_jump)
5815       && (! FLOAT_MODE_P (mode)
5816           || code == ORDERED || code == UNORDERED
5817           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5818           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5819     {
5820       enum rtx_code rcode;
5821       if (FLOAT_MODE_P (mode))
5822         rcode = reverse_condition_maybe_unordered (code);
5823       else
5824         rcode = reverse_condition (code);
5825
5826       /* Canonicalize to UNORDERED for the libcall.  */
5827       if (can_compare_p (rcode, mode, ccp_jump)
5828           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5829         {
5830           falseval = trueval;
5831           trueval = const0_rtx;
5832           code = rcode;
5833         }
5834     }
5835
5836   emit_move_insn (target, trueval);
5837   label = gen_label_rtx ();
5838   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5839                            NULL_RTX, label, -1);
5840
5841   emit_move_insn (target, falseval);
5842   emit_label (label);
5843
5844   return target;
5845 }
5846 \f
5847 /* Perform possibly multi-word comparison and conditional jump to LABEL
5848    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5849    now a thin wrapper around do_compare_rtx_and_jump.  */
5850
5851 static void
5852 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5853                  rtx label)
5854 {
5855   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5856   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5857                            NULL_RTX, NULL_RTX, label, -1);
5858 }