gcc/expmed.c

   1 /* Medium-level subroutines: convert bit-field store and extract
   2    and shifts, multiplies and divides to rtl instructions.
   3    Copyright (C) 1987, 1988, 1989, 1992, 1993, 1994, 1995, 1996, 1997, 1998,
   4    1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010,
   5    2011, 2012
   6    Free Software Foundation, Inc.
   7
   8 This file is part of GCC.
   9
  10 GCC is free software; you can redistribute it and/or modify it under
  11 the terms of the GNU General Public License as published by the Free
  12 Software Foundation; either version 3, or (at your option) any later
  13 version.
  14
  15 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
  16 WARRANTY; without even the implied warranty of MERCHANTABILITY or
  17 FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
  18 for more details.
  19
  20 You should have received a copy of the GNU General Public License
  21 along with GCC; see the file COPYING3.  If not see
  22 <http://www.gnu.org/licenses/>.  */
  23
  24
  25 #include "config.h"
  26 #include "system.h"
  27 #include "coretypes.h"
  28 #include "tm.h"
  29 #include "diagnostic-core.h"
  30 #include "rtl.h"
  31 #include "tree.h"
  32 #include "tm_p.h"
  33 #include "flags.h"
  34 #include "insn-config.h"
  35 #include "expr.h"
  36 #include "optabs.h"
  37 #include "recog.h"
  38 #include "langhooks.h"
  39 #include "df.h"
  40 #include "target.h"
  41 #include "expmed.h"
  42
  43 struct target_expmed default_target_expmed;
  44 #if SWITCHABLE_TARGET
  45 struct target_expmed *this_target_expmed = &default_target_expmed;
  46 #endif
  47
  48 static void store_fixed_bit_field (rtx, unsigned HOST_WIDE_INT,
  49                                    unsigned HOST_WIDE_INT,
  50                                    unsigned HOST_WIDE_INT,
  51                                    unsigned HOST_WIDE_INT,
  52                                    rtx);
  53 static void store_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  54                                    unsigned HOST_WIDE_INT,
  55                                    unsigned HOST_WIDE_INT,
  56                                    unsigned HOST_WIDE_INT,
  57                                    rtx);
  58 static rtx extract_fixed_bit_field (enum machine_mode, rtx,
  59                                     unsigned HOST_WIDE_INT,
  60                                     unsigned HOST_WIDE_INT, rtx, int, bool);
  61 static rtx mask_rtx (enum machine_mode, int, int, int);
  62 static rtx lshift_value (enum machine_mode, rtx, int, int);
  63 static rtx extract_split_bit_field (rtx, unsigned HOST_WIDE_INT,
  64                                     unsigned HOST_WIDE_INT, int);
  65 static void do_cmp_and_jump (rtx, rtx, enum rtx_code, enum machine_mode, rtx);
  66 static rtx expand_smod_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  67 static rtx expand_sdiv_pow2 (enum machine_mode, rtx, HOST_WIDE_INT);
  68
  69 /* Test whether a value is zero of a power of two.  */
  70 #define EXACT_POWER_OF_2_OR_ZERO_P(x) (((x) & ((x) - 1)) == 0)
  71
  72 /* Reduce conditional compilation elsewhere.  */
  73 #ifndef HAVE_insv
  74 #define HAVE_insv       0
  75 #define CODE_FOR_insv   CODE_FOR_nothing
  76 #define gen_insv(a,b,c,d) NULL_RTX
  77 #endif
  78 #ifndef HAVE_extv
  79 #define HAVE_extv       0
  80 #define CODE_FOR_extv   CODE_FOR_nothing
  81 #define gen_extv(a,b,c,d) NULL_RTX
  82 #endif
  83 #ifndef HAVE_extzv
  84 #define HAVE_extzv      0
  85 #define CODE_FOR_extzv  CODE_FOR_nothing
  86 #define gen_extzv(a,b,c,d) NULL_RTX
  87 #endif
  88
  89 struct init_expmed_rtl
  90 {
  91   struct rtx_def reg;           rtunion reg_fld[2];
  92   struct rtx_def plus;  rtunion plus_fld1;
  93   struct rtx_def neg;
  94   struct rtx_def mult;  rtunion mult_fld1;
  95   struct rtx_def sdiv;  rtunion sdiv_fld1;
  96   struct rtx_def udiv;  rtunion udiv_fld1;
  97   struct rtx_def sdiv_32;       rtunion sdiv_32_fld1;
  98   struct rtx_def smod_32;       rtunion smod_32_fld1;
  99   struct rtx_def wide_mult;     rtunion wide_mult_fld1;
 100   struct rtx_def wide_lshr;     rtunion wide_lshr_fld1;
 101   struct rtx_def wide_trunc;
 102   struct rtx_def shift; rtunion shift_fld1;
 103   struct rtx_def shift_mult;    rtunion shift_mult_fld1;
 104   struct rtx_def shift_add;     rtunion shift_add_fld1;
 105   struct rtx_def shift_sub0;    rtunion shift_sub0_fld1;
 106   struct rtx_def shift_sub1;    rtunion shift_sub1_fld1;
 107   struct rtx_def zext;
 108   struct rtx_def trunc;
 109
 110   rtx pow2[MAX_BITS_PER_WORD];
 111   rtx cint[MAX_BITS_PER_WORD];
 112 };
 113
 114 static void
 115 init_expmed_one_conv (struct init_expmed_rtl *all, enum machine_mode to_mode,
 116                       enum machine_mode from_mode, bool speed)
 117 {
 118   int to_size, from_size;
 119   rtx which;
 120
 121   /* We're given no information about the true size of a partial integer,
 122      only the size of the "full" integer it requires for storage.  For
 123      comparison purposes here, reduce the bit size by one in that case.  */
 124   to_size = (GET_MODE_BITSIZE (to_mode)
 125              - (GET_MODE_CLASS (to_mode) == MODE_PARTIAL_INT));
 126   from_size = (GET_MODE_BITSIZE (from_mode)
 127                - (GET_MODE_CLASS (from_mode) == MODE_PARTIAL_INT));
 128
 129   /* Assume cost of zero-extend and sign-extend is the same.  */
 130   which = (to_size < from_size ? &all->trunc : &all->zext);
 131
 132   PUT_MODE (&all->reg, from_mode);
 133   set_convert_cost (to_mode, from_mode, speed, set_src_cost (which, speed));
 134 }
 135
 136 static void
 137 init_expmed_one_mode (struct init_expmed_rtl *all,
 138                       enum machine_mode mode, int speed)
 139 {
 140   int m, n, mode_bitsize;
 141   enum machine_mode mode_from;
 142
 143   mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
 144
 145   PUT_MODE (&all->reg, mode);
 146   PUT_MODE (&all->plus, mode);
 147   PUT_MODE (&all->neg, mode);
 148   PUT_MODE (&all->mult, mode);
 149   PUT_MODE (&all->sdiv, mode);
 150   PUT_MODE (&all->udiv, mode);
 151   PUT_MODE (&all->sdiv_32, mode);
 152   PUT_MODE (&all->smod_32, mode);
 153   PUT_MODE (&all->wide_trunc, mode);
 154   PUT_MODE (&all->shift, mode);
 155   PUT_MODE (&all->shift_mult, mode);
 156   PUT_MODE (&all->shift_add, mode);
 157   PUT_MODE (&all->shift_sub0, mode);
 158   PUT_MODE (&all->shift_sub1, mode);
 159   PUT_MODE (&all->zext, mode);
 160   PUT_MODE (&all->trunc, mode);
 161
 162   set_add_cost (speed, mode, set_src_cost (&all->plus, speed));
 163   set_neg_cost (speed, mode, set_src_cost (&all->neg, speed));
 164   set_mul_cost (speed, mode, set_src_cost (&all->mult, speed));
 165   set_sdiv_cost (speed, mode, set_src_cost (&all->sdiv, speed));
 166   set_udiv_cost (speed, mode, set_src_cost (&all->udiv, speed));
 167
 168   set_sdiv_pow2_cheap (speed, mode, (set_src_cost (&all->sdiv_32, speed)
 169                                      <= 2 * add_cost (speed, mode)));
 170   set_smod_pow2_cheap (speed, mode, (set_src_cost (&all->smod_32, speed)
 171                                      <= 4 * add_cost (speed, mode)));
 172
 173   set_shift_cost (speed, mode, 0, 0);
 174   {
 175     int cost = add_cost (speed, mode);
 176     set_shiftadd_cost (speed, mode, 0, cost);
 177     set_shiftsub0_cost (speed, mode, 0, cost);
 178     set_shiftsub1_cost (speed, mode, 0, cost);
 179   }
 180
 181   n = MIN (MAX_BITS_PER_WORD, mode_bitsize);
 182   for (m = 1; m < n; m++)
 183     {
 184       XEXP (&all->shift, 1) = all->cint[m];
 185       XEXP (&all->shift_mult, 1) = all->pow2[m];
 186
 187       set_shift_cost (speed, mode, m, set_src_cost (&all->shift, speed));
 188       set_shiftadd_cost (speed, mode, m, set_src_cost (&all->shift_add, speed));
 189       set_shiftsub0_cost (speed, mode, m, set_src_cost (&all->shift_sub0, speed));
 190       set_shiftsub1_cost (speed, mode, m, set_src_cost (&all->shift_sub1, speed));
 191     }
 192
 193   if (SCALAR_INT_MODE_P (mode))
 194     {
 195       for (mode_from = MIN_MODE_INT; mode_from <= MAX_MODE_INT;
 196            mode_from = (enum machine_mode)(mode_from + 1))
 197         init_expmed_one_conv (all, mode, mode_from, speed);
 198     }
 199   if (GET_MODE_CLASS (mode) == MODE_INT)
 200     {
 201       enum machine_mode  wider_mode = GET_MODE_WIDER_MODE (mode);
 202       if (wider_mode != VOIDmode)
 203         {
 204           PUT_MODE (&all->zext, wider_mode);
 205           PUT_MODE (&all->wide_mult, wider_mode);
 206           PUT_MODE (&all->wide_lshr, wider_mode);
 207           XEXP (&all->wide_lshr, 1) = GEN_INT (mode_bitsize);
 208
 209           set_mul_widen_cost (speed, wider_mode,
 210                               set_src_cost (&all->wide_mult, speed));
 211           set_mul_highpart_cost (speed, mode,
 212                                  set_src_cost (&all->wide_trunc, speed));
 213         }
 214     }
 215 }
 216
 217 void
 218 init_expmed (void)
 219 {
 220   struct init_expmed_rtl all;
 221   enum machine_mode mode;
 222   int m, speed;
 223
 224   memset (&all, 0, sizeof all);
 225   for (m = 1; m < MAX_BITS_PER_WORD; m++)
 226     {
 227       all.pow2[m] = GEN_INT ((HOST_WIDE_INT) 1 << m);
 228       all.cint[m] = GEN_INT (m);
 229     }
 230
 231   PUT_CODE (&all.reg, REG);
 232   /* Avoid using hard regs in ways which may be unsupported.  */
 233   SET_REGNO (&all.reg, LAST_VIRTUAL_REGISTER + 1);
 234
 235   PUT_CODE (&all.plus, PLUS);
 236   XEXP (&all.plus, 0) = &all.reg;
 237   XEXP (&all.plus, 1) = &all.reg;
 238
 239   PUT_CODE (&all.neg, NEG);
 240   XEXP (&all.neg, 0) = &all.reg;
 241
 242   PUT_CODE (&all.mult, MULT);
 243   XEXP (&all.mult, 0) = &all.reg;
 244   XEXP (&all.mult, 1) = &all.reg;
 245
 246   PUT_CODE (&all.sdiv, DIV);
 247   XEXP (&all.sdiv, 0) = &all.reg;
 248   XEXP (&all.sdiv, 1) = &all.reg;
 249
 250   PUT_CODE (&all.udiv, UDIV);
 251   XEXP (&all.udiv, 0) = &all.reg;
 252   XEXP (&all.udiv, 1) = &all.reg;
 253
 254   PUT_CODE (&all.sdiv_32, DIV);
 255   XEXP (&all.sdiv_32, 0) = &all.reg;
 256   XEXP (&all.sdiv_32, 1) = 32 < MAX_BITS_PER_WORD ? all.cint[32] : GEN_INT (32);
 257
 258   PUT_CODE (&all.smod_32, MOD);
 259   XEXP (&all.smod_32, 0) = &all.reg;
 260   XEXP (&all.smod_32, 1) = XEXP (&all.sdiv_32, 1);
 261
 262   PUT_CODE (&all.zext, ZERO_EXTEND);
 263   XEXP (&all.zext, 0) = &all.reg;
 264
 265   PUT_CODE (&all.wide_mult, MULT);
 266   XEXP (&all.wide_mult, 0) = &all.zext;
 267   XEXP (&all.wide_mult, 1) = &all.zext;
 268
 269   PUT_CODE (&all.wide_lshr, LSHIFTRT);
 270   XEXP (&all.wide_lshr, 0) = &all.wide_mult;
 271
 272   PUT_CODE (&all.wide_trunc, TRUNCATE);
 273   XEXP (&all.wide_trunc, 0) = &all.wide_lshr;
 274
 275   PUT_CODE (&all.shift, ASHIFT);
 276   XEXP (&all.shift, 0) = &all.reg;
 277
 278   PUT_CODE (&all.shift_mult, MULT);
 279   XEXP (&all.shift_mult, 0) = &all.reg;
 280
 281   PUT_CODE (&all.shift_add, PLUS);
 282   XEXP (&all.shift_add, 0) = &all.shift_mult;
 283   XEXP (&all.shift_add, 1) = &all.reg;
 284
 285   PUT_CODE (&all.shift_sub0, MINUS);
 286   XEXP (&all.shift_sub0, 0) = &all.shift_mult;
 287   XEXP (&all.shift_sub0, 1) = &all.reg;
 288
 289   PUT_CODE (&all.shift_sub1, MINUS);
 290   XEXP (&all.shift_sub1, 0) = &all.reg;
 291   XEXP (&all.shift_sub1, 1) = &all.shift_mult;
 292
 293   PUT_CODE (&all.trunc, TRUNCATE);
 294   XEXP (&all.trunc, 0) = &all.reg;
 295
 296   for (speed = 0; speed < 2; speed++)
 297     {
 298       crtl->maybe_hot_insn_p = speed;
 299       set_zero_cost (speed, set_src_cost (const0_rtx, speed));
 300
 301       for (mode = MIN_MODE_INT; mode <= MAX_MODE_INT;
 302            mode = (enum machine_mode)(mode + 1))
 303         init_expmed_one_mode (&all, mode, speed);
 304
 305       if (MIN_MODE_PARTIAL_INT != VOIDmode)
 306         for (mode = MIN_MODE_PARTIAL_INT; mode <= MAX_MODE_PARTIAL_INT;
 307              mode = (enum machine_mode)(mode + 1))
 308           init_expmed_one_mode (&all, mode, speed);
 309
 310       if (MIN_MODE_VECTOR_INT != VOIDmode)
 311         for (mode = MIN_MODE_VECTOR_INT; mode <= MAX_MODE_VECTOR_INT;
 312              mode = (enum machine_mode)(mode + 1))
 313           init_expmed_one_mode (&all, mode, speed);
 314     }
 315
 316   if (alg_hash_used_p ())
 317     {
 318       struct alg_hash_entry *p = alg_hash_entry_ptr (0);
 319       memset (p, 0, sizeof (*p) * NUM_ALG_HASH_ENTRIES);
 320     }
 321   else
 322     set_alg_hash_used_p (true);
 323   default_rtl_profile ();
 324 }
 325
 326 /* Return an rtx representing minus the value of X.
 327    MODE is the intended mode of the result,
 328    useful if X is a CONST_INT.  */
 329
 330 rtx
 331 negate_rtx (enum machine_mode mode, rtx x)
 332 {
 333   rtx result = simplify_unary_operation (NEG, mode, x, mode);
 334
 335   if (result == 0)
 336     result = expand_unop (mode, neg_optab, x, NULL_RTX, 0);
 337
 338   return result;
 339 }
 340
 341 /* Report on the availability of insv/extv/extzv and the desired mode
 342    of each of their operands.  Returns MAX_MACHINE_MODE if HAVE_foo
 343    is false; else the mode of the specified operand.  If OPNO is -1,
 344    all the caller cares about is whether the insn is available.  */
 345 enum machine_mode
 346 mode_for_extraction (enum extraction_pattern pattern, int opno)
 347 {
 348   const struct insn_data_d *data;
 349
 350   switch (pattern)
 351     {
 352     case EP_insv:
 353       if (HAVE_insv)
 354         {
 355           data = &insn_data[CODE_FOR_insv];
 356           break;
 357         }
 358       return MAX_MACHINE_MODE;
 359
 360     case EP_extv:
 361       if (HAVE_extv)
 362         {
 363           data = &insn_data[CODE_FOR_extv];
 364           break;
 365         }
 366       return MAX_MACHINE_MODE;
 367
 368     case EP_extzv:
 369       if (HAVE_extzv)
 370         {
 371           data = &insn_data[CODE_FOR_extzv];
 372           break;
 373         }
 374       return MAX_MACHINE_MODE;
 375
 376     default:
 377       gcc_unreachable ();
 378     }
 379
 380   if (opno == -1)
 381     return VOIDmode;
 382
 383   /* Everyone who uses this function used to follow it with
 384      if (result == VOIDmode) result = word_mode; */
 385   if (data->operand[opno].mode == VOIDmode)
 386     return word_mode;
 387   return data->operand[opno].mode;
 388 }
 389
 390 /* Return true if a bitfield of size BITSIZE at bit number BITNUM within
 391    a structure of mode STRUCT_MODE represents a lowpart subreg.   The subreg
 392    offset is then BITNUM / BITS_PER_UNIT.  */
 393
 394 static bool
 395 lowpart_bit_field_p (unsigned HOST_WIDE_INT bitnum,
 396                      unsigned HOST_WIDE_INT bitsize,
 397                      enum machine_mode struct_mode)
 398 {
 399   if (BYTES_BIG_ENDIAN)
 400     return (bitnum % BITS_PER_UNIT == 0
 401             && (bitnum + bitsize == GET_MODE_BITSIZE (struct_mode)
 402                 || (bitnum + bitsize) % BITS_PER_WORD == 0));
 403   else
 404     return bitnum % BITS_PER_WORD == 0;
 405 }
 406 \f
 407 /* A subroutine of store_bit_field, with the same arguments.  Return true
 408    if the operation could be implemented.
 409
 410    If FALLBACK_P is true, fall back to store_fixed_bit_field if we have
 411    no other way of implementing the operation.  If FALLBACK_P is false,
 412    return false instead.  */
 413
 414 static bool
 415 store_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 416                    unsigned HOST_WIDE_INT bitnum,
 417                    unsigned HOST_WIDE_INT bitregion_start,
 418                    unsigned HOST_WIDE_INT bitregion_end,
 419                    enum machine_mode fieldmode,
 420                    rtx value, bool fallback_p)
 421 {
 422   rtx op0 = str_rtx;
 423   rtx orig_value;
 424
 425   while (GET_CODE (op0) == SUBREG)
 426     {
 427       /* The following line once was done only if WORDS_BIG_ENDIAN,
 428          but I think that is a mistake.  WORDS_BIG_ENDIAN is
 429          meaningful at a much higher level; when structures are copied
 430          between memory and regs, the higher-numbered regs
 431          always get higher addresses.  */
 432       int inner_mode_size = GET_MODE_SIZE (GET_MODE (SUBREG_REG (op0)));
 433       int outer_mode_size = GET_MODE_SIZE (GET_MODE (op0));
 434       int byte_offset = 0;
 435
 436       /* Paradoxical subregs need special handling on big endian machines.  */
 437       if (SUBREG_BYTE (op0) == 0 && inner_mode_size < outer_mode_size)
 438         {
 439           int difference = inner_mode_size - outer_mode_size;
 440
 441           if (WORDS_BIG_ENDIAN)
 442             byte_offset += (difference / UNITS_PER_WORD) * UNITS_PER_WORD;
 443           if (BYTES_BIG_ENDIAN)
 444             byte_offset += difference % UNITS_PER_WORD;
 445         }
 446       else
 447         byte_offset = SUBREG_BYTE (op0);
 448
 449       bitnum += byte_offset * BITS_PER_UNIT;
 450       op0 = SUBREG_REG (op0);
 451     }
 452
 453   /* No action is needed if the target is a register and if the field
 454      lies completely outside that register.  This can occur if the source
 455      code contains an out-of-bounds access to a small array.  */
 456   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
 457     return true;
 458
 459   /* Use vec_set patterns for inserting parts of vectors whenever
 460      available.  */
 461   if (VECTOR_MODE_P (GET_MODE (op0))
 462       && !MEM_P (op0)
 463       && optab_handler (vec_set_optab, GET_MODE (op0)) != CODE_FOR_nothing
 464       && fieldmode == GET_MODE_INNER (GET_MODE (op0))
 465       && bitsize == GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
 466       && !(bitnum % GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
 467     {
 468       struct expand_operand ops[3];
 469       enum machine_mode outermode = GET_MODE (op0);
 470       enum machine_mode innermode = GET_MODE_INNER (outermode);
 471       enum insn_code icode = optab_handler (vec_set_optab, outermode);
 472       int pos = bitnum / GET_MODE_BITSIZE (innermode);
 473
 474       create_fixed_operand (&ops[0], op0);
 475       create_input_operand (&ops[1], value, innermode);
 476       create_integer_operand (&ops[2], pos);
 477       if (maybe_expand_insn (icode, 3, ops))
 478         return true;
 479     }
 480
 481   /* If the target is a register, overwriting the entire object, or storing
 482      a full-word or multi-word field can be done with just a SUBREG.  */
 483   if (!MEM_P (op0)
 484       && bitsize == GET_MODE_BITSIZE (fieldmode)
 485       && ((bitsize == GET_MODE_BITSIZE (GET_MODE (op0)) && bitnum == 0)
 486           || (bitsize % BITS_PER_WORD == 0 && bitnum % BITS_PER_WORD == 0)))
 487     {
 488       /* Use the subreg machinery either to narrow OP0 to the required
 489          words or to cope with mode punning between equal-sized modes.  */
 490       rtx sub = simplify_gen_subreg (fieldmode, op0, GET_MODE (op0),
 491                                      bitnum / BITS_PER_UNIT);
 492       if (sub)
 493         {
 494           emit_move_insn (sub, value);
 495           return true;
 496         }
 497     }
 498
 499   /* If the target is memory, storing any naturally aligned field can be
 500      done with a simple store.  For targets that support fast unaligned
 501      memory, any naturally sized, unit aligned field can be done directly.  */
 502   if (MEM_P (op0)
 503       && bitnum % BITS_PER_UNIT == 0
 504       && bitsize == GET_MODE_BITSIZE (fieldmode)
 505       && (!SLOW_UNALIGNED_ACCESS (fieldmode, MEM_ALIGN (op0))
 506           || (bitnum % bitsize == 0
 507               && MEM_ALIGN (op0) % bitsize == 0)))
 508     {
 509       op0 = adjust_bitfield_address (op0, fieldmode, bitnum / BITS_PER_UNIT);
 510       emit_move_insn (op0, value);
 511       return true;
 512     }
 513
 514   /* Make sure we are playing with integral modes.  Pun with subregs
 515      if we aren't.  This must come after the entire register case above,
 516      since that case is valid for any mode.  The following cases are only
 517      valid for integral modes.  */
 518   {
 519     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
 520     if (imode != GET_MODE (op0))
 521       {
 522         if (MEM_P (op0))
 523           op0 = adjust_bitfield_address (op0, imode, 0);
 524         else
 525           {
 526             gcc_assert (imode != BLKmode);
 527             op0 = gen_lowpart (imode, op0);
 528           }
 529       }
 530   }
 531
 532   /* Storing an lsb-aligned field in a register
 533      can be done with a movstrict instruction.  */
 534
 535   if (!MEM_P (op0)
 536       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
 537       && bitsize == GET_MODE_BITSIZE (fieldmode)
 538       && optab_handler (movstrict_optab, fieldmode) != CODE_FOR_nothing)
 539     {
 540       struct expand_operand ops[2];
 541       enum insn_code icode = optab_handler (movstrict_optab, fieldmode);
 542       rtx arg0 = op0;
 543       unsigned HOST_WIDE_INT subreg_off;
 544
 545       if (GET_CODE (arg0) == SUBREG)
 546         {
 547           /* Else we've got some float mode source being extracted into
 548              a different float mode destination -- this combination of
 549              subregs results in Severe Tire Damage.  */
 550           gcc_assert (GET_MODE (SUBREG_REG (arg0)) == fieldmode
 551                       || GET_MODE_CLASS (fieldmode) == MODE_INT
 552                       || GET_MODE_CLASS (fieldmode) == MODE_PARTIAL_INT);
 553           arg0 = SUBREG_REG (arg0);
 554         }
 555
 556       subreg_off = bitnum / BITS_PER_UNIT;
 557       if (validate_subreg (fieldmode, GET_MODE (arg0), arg0, subreg_off))
 558         {
 559           arg0 = gen_rtx_SUBREG (fieldmode, arg0, subreg_off);
 560
 561           create_fixed_operand (&ops[0], arg0);
 562           /* Shrink the source operand to FIELDMODE.  */
 563           create_convert_operand_to (&ops[1], value, fieldmode, false);
 564           if (maybe_expand_insn (icode, 2, ops))
 565             return true;
 566         }
 567     }
 568
 569   /* Handle fields bigger than a word.  */
 570
 571   if (bitsize > BITS_PER_WORD)
 572     {
 573       /* Here we transfer the words of the field
 574          in the order least significant first.
 575          This is because the most significant word is the one which may
 576          be less than full.
 577          However, only do that if the value is not BLKmode.  */
 578
 579       unsigned int backwards = WORDS_BIG_ENDIAN && fieldmode != BLKmode;
 580       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
 581       unsigned int i;
 582       rtx last;
 583
 584       /* This is the mode we must force value to, so that there will be enough
 585          subwords to extract.  Note that fieldmode will often (always?) be
 586          VOIDmode, because that is what store_field uses to indicate that this
 587          is a bit field, but passing VOIDmode to operand_subword_force
 588          is not allowed.  */
 589       fieldmode = GET_MODE (value);
 590       if (fieldmode == VOIDmode)
 591         fieldmode = smallest_mode_for_size (nwords * BITS_PER_WORD, MODE_INT);
 592
 593       last = get_last_insn ();
 594       for (i = 0; i < nwords; i++)
 595         {
 596           /* If I is 0, use the low-order word in both field and target;
 597              if I is 1, use the next to lowest word; and so on.  */
 598           unsigned int wordnum = (backwards
 599                                   ? GET_MODE_SIZE (fieldmode) / UNITS_PER_WORD
 600                                   - i - 1
 601                                   : i);
 602           unsigned int bit_offset = (backwards
 603                                      ? MAX ((int) bitsize - ((int) i + 1)
 604                                             * BITS_PER_WORD,
 605                                             0)
 606                                      : (int) i * BITS_PER_WORD);
 607           rtx value_word = operand_subword_force (value, wordnum, fieldmode);
 608           unsigned HOST_WIDE_INT new_bitsize =
 609             MIN (BITS_PER_WORD, bitsize - i * BITS_PER_WORD);
 610
 611           /* If the remaining chunk doesn't have full wordsize we have
 612              to make sure that for big endian machines the higher order
 613              bits are used.  */
 614           if (new_bitsize < BITS_PER_WORD && BYTES_BIG_ENDIAN && !backwards)
 615             value_word = simplify_expand_binop (word_mode, lshr_optab,
 616                                                 value_word,
 617                                                 GEN_INT (BITS_PER_WORD
 618                                                          - new_bitsize),
 619                                                 NULL_RTX, true,
 620                                                 OPTAB_LIB_WIDEN);
 621
 622           if (!store_bit_field_1 (op0, new_bitsize,
 623                                   bitnum + bit_offset,
 624                                   bitregion_start, bitregion_end,
 625                                   word_mode,
 626                                   value_word, fallback_p))
 627             {
 628               delete_insns_since (last);
 629               return false;
 630             }
 631         }
 632       return true;
 633     }
 634
 635   /* If VALUE has a floating-point or complex mode, access it as an
 636      integer of the corresponding size.  This can occur on a machine
 637      with 64 bit registers that uses SFmode for float.  It can also
 638      occur for unaligned float or complex fields.  */
 639   orig_value = value;
 640   if (GET_MODE (value) != VOIDmode
 641       && GET_MODE_CLASS (GET_MODE (value)) != MODE_INT
 642       && GET_MODE_CLASS (GET_MODE (value)) != MODE_PARTIAL_INT)
 643     {
 644       value = gen_reg_rtx (int_mode_for_mode (GET_MODE (value)));
 645       emit_move_insn (gen_lowpart (GET_MODE (orig_value), value), orig_value);
 646     }
 647
 648   /* If OP0 is a multi-word register, narrow it to the affected word.
 649      If the region spans two words, defer to store_split_bit_field.  */
 650   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
 651     {
 652       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
 653                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
 654       gcc_assert (op0);
 655       bitnum %= BITS_PER_WORD;
 656       if (bitnum + bitsize > BITS_PER_WORD)
 657         {
 658           if (!fallback_p)
 659             return false;
 660
 661           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 662                                  bitregion_end, value);
 663           return true;
 664         }
 665     }
 666
 667   /* From here on we can assume that the field to be stored in fits
 668      within a word.  If the destination is a register, it too fits
 669      in a word.  */
 670
 671   enum machine_mode op_mode = mode_for_extraction (EP_insv, 3);
 672   if (HAVE_insv
 673       && GET_MODE (value) != BLKmode
 674       && bitsize > 0
 675       && GET_MODE_BITSIZE (op_mode) >= bitsize
 676       /* Do not use insv for volatile bitfields when
 677          -fstrict-volatile-bitfields is in effect.  */
 678       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
 679            && flag_strict_volatile_bitfields > 0)
 680       /* Do not use insv if the bit region is restricted and
 681          op_mode integer at offset doesn't fit into the
 682          restricted region.  */
 683       && !(MEM_P (op0) && bitregion_end
 684            && bitnum - (bitnum % BITS_PER_UNIT) + GET_MODE_BITSIZE (op_mode)
 685               > bitregion_end + 1))
 686     {
 687       struct expand_operand ops[4];
 688       unsigned HOST_WIDE_INT bitpos = bitnum;
 689       rtx value1;
 690       rtx xop0 = op0;
 691       rtx last = get_last_insn ();
 692       bool copy_back = false;
 693
 694       unsigned int unit = GET_MODE_BITSIZE (op_mode);
 695       if (MEM_P (xop0))
 696         {
 697           /* Get a reference to the first byte of the field.  */
 698           xop0 = adjust_bitfield_address (xop0, byte_mode,
 699                                           bitpos / BITS_PER_UNIT);
 700           bitpos %= BITS_PER_UNIT;
 701         }
 702       else
 703         {
 704           /* Convert from counting within OP0 to counting in OP_MODE.  */
 705           if (BYTES_BIG_ENDIAN)
 706             bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
 707         }
 708
 709       /* If xop0 is a register, we need it in OP_MODE
 710          to make it acceptable to the format of insv.  */
 711       if (GET_CODE (xop0) == SUBREG)
 712         /* We can't just change the mode, because this might clobber op0,
 713            and we will need the original value of op0 if insv fails.  */
 714         xop0 = gen_rtx_SUBREG (op_mode, SUBREG_REG (xop0), SUBREG_BYTE (xop0));
 715       if (REG_P (xop0) && GET_MODE (xop0) != op_mode)
 716         xop0 = gen_lowpart_SUBREG (op_mode, xop0);
 717
 718       /* If the destination is a paradoxical subreg such that we need a
 719          truncate to the inner mode, perform the insertion on a temporary and
 720          truncate the result to the original destination.  Note that we can't
 721          just truncate the paradoxical subreg as (truncate:N (subreg:W (reg:N
 722          X) 0)) is (reg:N X).  */
 723       if (GET_CODE (xop0) == SUBREG
 724           && REG_P (SUBREG_REG (xop0))
 725           && (!TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (SUBREG_REG (xop0)),
 726                                               op_mode)))
 727         {
 728           rtx tem = gen_reg_rtx (op_mode);
 729           emit_move_insn (tem, xop0);
 730           xop0 = tem;
 731           copy_back = true;
 732         }
 733
 734       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
 735          "backwards" from the size of the unit we are inserting into.
 736          Otherwise, we count bits from the most significant on a
 737          BYTES/BITS_BIG_ENDIAN machine.  */
 738
 739       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
 740         bitpos = unit - bitsize - bitpos;
 741
 742       /* Convert VALUE to op_mode (which insv insn wants) in VALUE1.  */
 743       value1 = value;
 744       if (GET_MODE (value) != op_mode)
 745         {
 746           if (GET_MODE_BITSIZE (GET_MODE (value)) >= bitsize)
 747             {
 748               /* Optimization: Don't bother really extending VALUE
 749                  if it has all the bits we will actually use.  However,
 750                  if we must narrow it, be sure we do it correctly.  */
 751
 752               if (GET_MODE_SIZE (GET_MODE (value)) < GET_MODE_SIZE (op_mode))
 753                 {
 754                   rtx tmp;
 755
 756                   tmp = simplify_subreg (op_mode, value1, GET_MODE (value), 0);
 757                   if (! tmp)
 758                     tmp = simplify_gen_subreg (op_mode,
 759                                                force_reg (GET_MODE (value),
 760                                                           value1),
 761                                                GET_MODE (value), 0);
 762                   value1 = tmp;
 763                 }
 764               else
 765                 value1 = gen_lowpart (op_mode, value1);
 766             }
 767           else if (CONST_INT_P (value))
 768             value1 = gen_int_mode (INTVAL (value), op_mode);
 769           else
 770             /* Parse phase is supposed to make VALUE's data type
 771                match that of the component reference, which is a type
 772                at least as wide as the field; so VALUE should have
 773                a mode that corresponds to that type.  */
 774             gcc_assert (CONSTANT_P (value));
 775         }
 776
 777       create_fixed_operand (&ops[0], xop0);
 778       create_integer_operand (&ops[1], bitsize);
 779       create_integer_operand (&ops[2], bitpos);
 780       create_input_operand (&ops[3], value1, op_mode);
 781       if (maybe_expand_insn (CODE_FOR_insv, 4, ops))
 782         {
 783           if (copy_back)
 784             convert_move (op0, xop0, true);
 785           return true;
 786         }
 787       delete_insns_since (last);
 788     }
 789
 790   /* If OP0 is a memory, try copying it to a register and seeing if a
 791      cheap register alternative is available.  */
 792   if (HAVE_insv && MEM_P (op0))
 793     {
 794       enum machine_mode bestmode;
 795       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 796
 797       if (bitregion_end)
 798         maxbits = bitregion_end - bitregion_start + 1;
 799
 800       /* Get the mode to use for inserting into this field.  If OP0 is
 801          BLKmode, get the smallest mode consistent with the alignment. If
 802          OP0 is a non-BLKmode object that is no wider than OP_MODE, use its
 803          mode. Otherwise, use the smallest mode containing the field.  */
 804
 805       if (GET_MODE (op0) == BLKmode
 806           || GET_MODE_BITSIZE (GET_MODE (op0)) > maxbits
 807           || (op_mode != MAX_MACHINE_MODE
 808               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (op_mode)))
 809         bestmode = get_best_mode (bitsize, bitnum,
 810                                   bitregion_start, bitregion_end,
 811                                   MEM_ALIGN (op0),
 812                                   (op_mode == MAX_MACHINE_MODE
 813                                    ? VOIDmode : op_mode),
 814                                   MEM_VOLATILE_P (op0));
 815       else
 816         bestmode = GET_MODE (op0);
 817
 818       if (bestmode != VOIDmode
 819           && GET_MODE_SIZE (bestmode) >= GET_MODE_SIZE (fieldmode)
 820           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
 821                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
 822         {
 823           rtx last, tempreg, xop0;
 824           unsigned int unit;
 825           unsigned HOST_WIDE_INT offset, bitpos;
 826
 827           last = get_last_insn ();
 828
 829           /* Adjust address to point to the containing unit of
 830              that mode.  Compute the offset as a multiple of this unit,
 831              counting in bytes.  */
 832           unit = GET_MODE_BITSIZE (bestmode);
 833           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
 834           bitpos = bitnum % unit;
 835           xop0 = adjust_bitfield_address (op0, bestmode, offset);
 836
 837           /* Fetch that unit, store the bitfield in it, then store
 838              the unit.  */
 839           tempreg = copy_to_reg (xop0);
 840           if (store_bit_field_1 (tempreg, bitsize, bitpos,
 841                                  bitregion_start, bitregion_end,
 842                                  fieldmode, orig_value, false))
 843             {
 844               emit_move_insn (xop0, tempreg);
 845               return true;
 846             }
 847           delete_insns_since (last);
 848         }
 849     }
 850
 851   if (!fallback_p)
 852     return false;
 853
 854   store_fixed_bit_field (op0, bitsize, bitnum, bitregion_start,
 855                          bitregion_end, value);
 856   return true;
 857 }
 858
 859 /* Generate code to store value from rtx VALUE
 860    into a bit-field within structure STR_RTX
 861    containing BITSIZE bits starting at bit BITNUM.
 862
 863    BITREGION_START is bitpos of the first bitfield in this region.
 864    BITREGION_END is the bitpos of the ending bitfield in this region.
 865    These two fields are 0, if the C++ memory model does not apply,
 866    or we are not interested in keeping track of bitfield regions.
 867
 868    FIELDMODE is the machine-mode of the FIELD_DECL node for this field.  */
 869
 870 void
 871 store_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
 872                  unsigned HOST_WIDE_INT bitnum,
 873                  unsigned HOST_WIDE_INT bitregion_start,
 874                  unsigned HOST_WIDE_INT bitregion_end,
 875                  enum machine_mode fieldmode,
 876                  rtx value)
 877 {
 878   /* Under the C++0x memory model, we must not touch bits outside the
 879      bit region.  Adjust the address to start at the beginning of the
 880      bit region.  */
 881   if (MEM_P (str_rtx) && bitregion_start > 0)
 882     {
 883       enum machine_mode bestmode;
 884       enum machine_mode op_mode;
 885       unsigned HOST_WIDE_INT offset;
 886
 887       op_mode = mode_for_extraction (EP_insv, 3);
 888       if (op_mode == MAX_MACHINE_MODE)
 889         op_mode = VOIDmode;
 890
 891       gcc_assert ((bitregion_start % BITS_PER_UNIT) == 0);
 892
 893       offset = bitregion_start / BITS_PER_UNIT;
 894       bitnum -= bitregion_start;
 895       bitregion_end -= bitregion_start;
 896       bitregion_start = 0;
 897       bestmode = get_best_mode (bitsize, bitnum,
 898                                 bitregion_start, bitregion_end,
 899                                 MEM_ALIGN (str_rtx),
 900                                 op_mode,
 901                                 MEM_VOLATILE_P (str_rtx));
 902       str_rtx = adjust_address (str_rtx, bestmode, offset);
 903     }
 904
 905   if (!store_bit_field_1 (str_rtx, bitsize, bitnum,
 906                           bitregion_start, bitregion_end,
 907                           fieldmode, value, true))
 908     gcc_unreachable ();
 909 }
 910 \f
 911 /* Use shifts and boolean operations to store VALUE into a bit field of
 912    width BITSIZE in OP0, starting at bit BITNUM.  */
 913
 914 static void
 915 store_fixed_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
 916                        unsigned HOST_WIDE_INT bitnum,
 917                        unsigned HOST_WIDE_INT bitregion_start,
 918                        unsigned HOST_WIDE_INT bitregion_end,
 919                        rtx value)
 920 {
 921   enum machine_mode mode;
 922   rtx temp;
 923   int all_zero = 0;
 924   int all_one = 0;
 925
 926   /* There is a case not handled here:
 927      a structure with a known alignment of just a halfword
 928      and a field split across two aligned halfwords within the structure.
 929      Or likewise a structure with a known alignment of just a byte
 930      and a field split across two bytes.
 931      Such cases are not supposed to be able to occur.  */
 932
 933   if (MEM_P (op0))
 934     {
 935       unsigned HOST_WIDE_INT maxbits = MAX_FIXED_MODE_SIZE;
 936
 937       if (bitregion_end)
 938         maxbits = bitregion_end - bitregion_start + 1;
 939
 940       /* Get the proper mode to use for this field.  We want a mode that
 941          includes the entire field.  If such a mode would be larger than
 942          a word, we won't be doing the extraction the normal way.
 943          We don't want a mode bigger than the destination.  */
 944
 945       mode = GET_MODE (op0);
 946       if (GET_MODE_BITSIZE (mode) == 0
 947           || GET_MODE_BITSIZE (mode) > GET_MODE_BITSIZE (word_mode))
 948         mode = word_mode;
 949
 950       if (MEM_VOLATILE_P (op0)
 951           && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
 952           && GET_MODE_BITSIZE (GET_MODE (op0)) <= maxbits
 953           && flag_strict_volatile_bitfields > 0)
 954         mode = GET_MODE (op0);
 955       else
 956         mode = get_best_mode (bitsize, bitnum, bitregion_start, bitregion_end,
 957                               MEM_ALIGN (op0), mode, MEM_VOLATILE_P (op0));
 958
 959       if (mode == VOIDmode)
 960         {
 961           /* The only way this should occur is if the field spans word
 962              boundaries.  */
 963           store_split_bit_field (op0, bitsize, bitnum, bitregion_start,
 964                                  bitregion_end, value);
 965           return;
 966         }
 967
 968       HOST_WIDE_INT bit_offset = bitnum - bitnum % GET_MODE_BITSIZE (mode);
 969       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
 970       bitnum -= bit_offset;
 971     }
 972
 973   mode = GET_MODE (op0);
 974   gcc_assert (SCALAR_INT_MODE_P (mode));
 975
 976   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
 977      for invalid input, such as f5 from gcc.dg/pr48335-2.c.  */
 978
 979   if (BYTES_BIG_ENDIAN)
 980     /* BITNUM is the distance between our msb
 981        and that of the containing datum.
 982        Convert it to the distance from the lsb.  */
 983     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
 984
 985   /* Now BITNUM is always the distance between our lsb
 986      and that of OP0.  */
 987
 988   /* Shift VALUE left by BITNUM bits.  If VALUE is not constant,
 989      we must first convert its mode to MODE.  */
 990
 991   if (CONST_INT_P (value))
 992     {
 993       HOST_WIDE_INT v = INTVAL (value);
 994
 995       if (bitsize < HOST_BITS_PER_WIDE_INT)
 996         v &= ((HOST_WIDE_INT) 1 << bitsize) - 1;
 997
 998       if (v == 0)
 999         all_zero = 1;
1000       else if ((bitsize < HOST_BITS_PER_WIDE_INT
1001                 && v == ((HOST_WIDE_INT) 1 << bitsize) - 1)
1002                || (bitsize == HOST_BITS_PER_WIDE_INT && v == -1))
1003         all_one = 1;
1004
1005       value = lshift_value (mode, value, bitnum, bitsize);
1006     }
1007   else
1008     {
1009       int must_and = (GET_MODE_BITSIZE (GET_MODE (value)) != bitsize
1010                       && bitnum + bitsize != GET_MODE_BITSIZE (mode));
1011
1012       if (GET_MODE (value) != mode)
1013         value = convert_to_mode (mode, value, 1);
1014
1015       if (must_and)
1016         value = expand_binop (mode, and_optab, value,
1017                               mask_rtx (mode, 0, bitsize, 0),
1018                               NULL_RTX, 1, OPTAB_LIB_WIDEN);
1019       if (bitnum > 0)
1020         value = expand_shift (LSHIFT_EXPR, mode, value,
1021                               bitnum, NULL_RTX, 1);
1022     }
1023
1024   /* Now clear the chosen bits in OP0,
1025      except that if VALUE is -1 we need not bother.  */
1026   /* We keep the intermediates in registers to allow CSE to combine
1027      consecutive bitfield assignments.  */
1028
1029   temp = force_reg (mode, op0);
1030
1031   if (! all_one)
1032     {
1033       temp = expand_binop (mode, and_optab, temp,
1034                            mask_rtx (mode, bitnum, bitsize, 1),
1035                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1036       temp = force_reg (mode, temp);
1037     }
1038
1039   /* Now logical-or VALUE into OP0, unless it is zero.  */
1040
1041   if (! all_zero)
1042     {
1043       temp = expand_binop (mode, ior_optab, temp, value,
1044                            NULL_RTX, 1, OPTAB_LIB_WIDEN);
1045       temp = force_reg (mode, temp);
1046     }
1047
1048   if (op0 != temp)
1049     {
1050       op0 = copy_rtx (op0);
1051       emit_move_insn (op0, temp);
1052     }
1053 }
1054 \f
1055 /* Store a bit field that is split across multiple accessible memory objects.
1056
1057    OP0 is the REG, SUBREG or MEM rtx for the first of the objects.
1058    BITSIZE is the field width; BITPOS the position of its first bit
1059    (within the word).
1060    VALUE is the value to store.
1061
1062    This does not yet handle fields wider than BITS_PER_WORD.  */
1063
1064 static void
1065 store_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1066                        unsigned HOST_WIDE_INT bitpos,
1067                        unsigned HOST_WIDE_INT bitregion_start,
1068                        unsigned HOST_WIDE_INT bitregion_end,
1069                        rtx value)
1070 {
1071   unsigned int unit;
1072   unsigned int bitsdone = 0;
1073
1074   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1075      much at a time.  */
1076   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1077     unit = BITS_PER_WORD;
1078   else
1079     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1080
1081   /* If VALUE is a constant other than a CONST_INT, get it into a register in
1082      WORD_MODE.  If we can do this using gen_lowpart_common, do so.  Note
1083      that VALUE might be a floating-point constant.  */
1084   if (CONSTANT_P (value) && !CONST_INT_P (value))
1085     {
1086       rtx word = gen_lowpart_common (word_mode, value);
1087
1088       if (word && (value != word))
1089         value = word;
1090       else
1091         value = gen_lowpart_common (word_mode,
1092                                     force_reg (GET_MODE (value) != VOIDmode
1093                                                ? GET_MODE (value)
1094                                                : word_mode, value));
1095     }
1096
1097   while (bitsdone < bitsize)
1098     {
1099       unsigned HOST_WIDE_INT thissize;
1100       rtx part, word;
1101       unsigned HOST_WIDE_INT thispos;
1102       unsigned HOST_WIDE_INT offset;
1103
1104       offset = (bitpos + bitsdone) / unit;
1105       thispos = (bitpos + bitsdone) % unit;
1106
1107       /* When region of bytes we can touch is restricted, decrease
1108          UNIT close to the end of the region as needed.  */
1109       if (bitregion_end
1110           && unit > BITS_PER_UNIT
1111           && bitpos + bitsdone - thispos + unit > bitregion_end + 1)
1112         {
1113           unit = unit / 2;
1114           continue;
1115         }
1116
1117       /* THISSIZE must not overrun a word boundary.  Otherwise,
1118          store_fixed_bit_field will call us again, and we will mutually
1119          recurse forever.  */
1120       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1121       thissize = MIN (thissize, unit - thispos);
1122
1123       if (BYTES_BIG_ENDIAN)
1124         {
1125           /* Fetch successively less significant portions.  */
1126           if (CONST_INT_P (value))
1127             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1128                              >> (bitsize - bitsdone - thissize))
1129                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1130           else
1131             {
1132               int total_bits = GET_MODE_BITSIZE (GET_MODE (value));
1133               /* The args are chosen so that the last part includes the
1134                  lsb.  Give extract_bit_field the value it needs (with
1135                  endianness compensation) to fetch the piece we want.  */
1136               part = extract_fixed_bit_field (word_mode, value, thissize,
1137                                               total_bits - bitsize + bitsdone,
1138                                               NULL_RTX, 1, false);
1139             }
1140         }
1141       else
1142         {
1143           /* Fetch successively more significant portions.  */
1144           if (CONST_INT_P (value))
1145             part = GEN_INT (((unsigned HOST_WIDE_INT) (INTVAL (value))
1146                              >> bitsdone)
1147                             & (((HOST_WIDE_INT) 1 << thissize) - 1));
1148           else
1149             part = extract_fixed_bit_field (word_mode, value, thissize,
1150                                             bitsdone, NULL_RTX, 1, false);
1151         }
1152
1153       /* If OP0 is a register, then handle OFFSET here.
1154
1155          When handling multiword bitfields, extract_bit_field may pass
1156          down a word_mode SUBREG of a larger REG for a bitfield that actually
1157          crosses a word boundary.  Thus, for a SUBREG, we must find
1158          the current word starting from the base register.  */
1159       if (GET_CODE (op0) == SUBREG)
1160         {
1161           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1162           enum machine_mode sub_mode = GET_MODE (SUBREG_REG (op0));
1163           if (sub_mode != BLKmode && GET_MODE_SIZE (sub_mode) < UNITS_PER_WORD)
1164             word = word_offset ? const0_rtx : op0;
1165           else
1166             word = operand_subword_force (SUBREG_REG (op0), word_offset,
1167                                           GET_MODE (SUBREG_REG (op0)));
1168           offset = 0;
1169         }
1170       else if (REG_P (op0))
1171         {
1172           enum machine_mode op0_mode = GET_MODE (op0);
1173           if (op0_mode != BLKmode && GET_MODE_SIZE (op0_mode) < UNITS_PER_WORD)
1174             word = offset ? const0_rtx : op0;
1175           else
1176             word = operand_subword_force (op0, offset, GET_MODE (op0));
1177           offset = 0;
1178         }
1179       else
1180         word = op0;
1181
1182       /* OFFSET is in UNITs, and UNIT is in bits.  If WORD is const0_rtx,
1183          it is just an out-of-bounds access.  Ignore it.  */
1184       if (word != const0_rtx)
1185         store_fixed_bit_field (word, thissize, offset * unit + thispos,
1186                                bitregion_start, bitregion_end, part);
1187       bitsdone += thissize;
1188     }
1189 }
1190 \f
1191 /* A subroutine of extract_bit_field_1 that converts return value X
1192    to either MODE or TMODE.  MODE, TMODE and UNSIGNEDP are arguments
1193    to extract_bit_field.  */
1194
1195 static rtx
1196 convert_extracted_bit_field (rtx x, enum machine_mode mode,
1197                              enum machine_mode tmode, bool unsignedp)
1198 {
1199   if (GET_MODE (x) == tmode || GET_MODE (x) == mode)
1200     return x;
1201
1202   /* If the x mode is not a scalar integral, first convert to the
1203      integer mode of that size and then access it as a floating-point
1204      value via a SUBREG.  */
1205   if (!SCALAR_INT_MODE_P (tmode))
1206     {
1207       enum machine_mode smode;
1208
1209       smode = mode_for_size (GET_MODE_BITSIZE (tmode), MODE_INT, 0);
1210       x = convert_to_mode (smode, x, unsignedp);
1211       x = force_reg (smode, x);
1212       return gen_lowpart (tmode, x);
1213     }
1214
1215   return convert_to_mode (tmode, x, unsignedp);
1216 }
1217
1218 /* A subroutine of extract_bit_field, with the same arguments.
1219    If FALLBACK_P is true, fall back to extract_fixed_bit_field
1220    if we can find no other means of implementing the operation.
1221    if FALLBACK_P is false, return NULL instead.  */
1222
1223 static rtx
1224 extract_bit_field_1 (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1225                      unsigned HOST_WIDE_INT bitnum,
1226                      int unsignedp, bool packedp, rtx target,
1227                      enum machine_mode mode, enum machine_mode tmode,
1228                      bool fallback_p)
1229 {
1230   rtx op0 = str_rtx;
1231   enum machine_mode int_mode;
1232   enum machine_mode ext_mode;
1233   enum machine_mode mode1;
1234
1235   if (tmode == VOIDmode)
1236     tmode = mode;
1237
1238   while (GET_CODE (op0) == SUBREG)
1239     {
1240       bitnum += SUBREG_BYTE (op0) * BITS_PER_UNIT;
1241       op0 = SUBREG_REG (op0);
1242     }
1243
1244   /* If we have an out-of-bounds access to a register, just return an
1245      uninitialized register of the required mode.  This can occur if the
1246      source code contains an out-of-bounds access to a small array.  */
1247   if (REG_P (op0) && bitnum >= GET_MODE_BITSIZE (GET_MODE (op0)))
1248     return gen_reg_rtx (tmode);
1249
1250   if (REG_P (op0)
1251       && mode == GET_MODE (op0)
1252       && bitnum == 0
1253       && bitsize == GET_MODE_BITSIZE (GET_MODE (op0)))
1254     {
1255       /* We're trying to extract a full register from itself.  */
1256       return op0;
1257     }
1258
1259   /* See if we can get a better vector mode before extracting.  */
1260   if (VECTOR_MODE_P (GET_MODE (op0))
1261       && !MEM_P (op0)
1262       && GET_MODE_INNER (GET_MODE (op0)) != tmode)
1263     {
1264       enum machine_mode new_mode;
1265
1266       if (GET_MODE_CLASS (tmode) == MODE_FLOAT)
1267         new_mode = MIN_MODE_VECTOR_FLOAT;
1268       else if (GET_MODE_CLASS (tmode) == MODE_FRACT)
1269         new_mode = MIN_MODE_VECTOR_FRACT;
1270       else if (GET_MODE_CLASS (tmode) == MODE_UFRACT)
1271         new_mode = MIN_MODE_VECTOR_UFRACT;
1272       else if (GET_MODE_CLASS (tmode) == MODE_ACCUM)
1273         new_mode = MIN_MODE_VECTOR_ACCUM;
1274       else if (GET_MODE_CLASS (tmode) == MODE_UACCUM)
1275         new_mode = MIN_MODE_VECTOR_UACCUM;
1276       else
1277         new_mode = MIN_MODE_VECTOR_INT;
1278
1279       for (; new_mode != VOIDmode ; new_mode = GET_MODE_WIDER_MODE (new_mode))
1280         if (GET_MODE_SIZE (new_mode) == GET_MODE_SIZE (GET_MODE (op0))
1281             && targetm.vector_mode_supported_p (new_mode))
1282           break;
1283       if (new_mode != VOIDmode)
1284         op0 = gen_lowpart (new_mode, op0);
1285     }
1286
1287   /* Use vec_extract patterns for extracting parts of vectors whenever
1288      available.  */
1289   if (VECTOR_MODE_P (GET_MODE (op0))
1290       && !MEM_P (op0)
1291       && optab_handler (vec_extract_optab, GET_MODE (op0)) != CODE_FOR_nothing
1292       && ((bitnum + bitsize - 1) / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))
1293           == bitnum / GET_MODE_BITSIZE (GET_MODE_INNER (GET_MODE (op0)))))
1294     {
1295       struct expand_operand ops[3];
1296       enum machine_mode outermode = GET_MODE (op0);
1297       enum machine_mode innermode = GET_MODE_INNER (outermode);
1298       enum insn_code icode = optab_handler (vec_extract_optab, outermode);
1299       unsigned HOST_WIDE_INT pos = bitnum / GET_MODE_BITSIZE (innermode);
1300
1301       create_output_operand (&ops[0], target, innermode);
1302       create_input_operand (&ops[1], op0, outermode);
1303       create_integer_operand (&ops[2], pos);
1304       if (maybe_expand_insn (icode, 3, ops))
1305         {
1306           target = ops[0].value;
1307           if (GET_MODE (target) != mode)
1308             return gen_lowpart (tmode, target);
1309           return target;
1310         }
1311     }
1312
1313   /* Make sure we are playing with integral modes.  Pun with subregs
1314      if we aren't.  */
1315   {
1316     enum machine_mode imode = int_mode_for_mode (GET_MODE (op0));
1317     if (imode != GET_MODE (op0))
1318       {
1319         if (MEM_P (op0))
1320           op0 = adjust_bitfield_address (op0, imode, 0);
1321         else if (imode != BLKmode)
1322           {
1323             op0 = gen_lowpart (imode, op0);
1324
1325             /* If we got a SUBREG, force it into a register since we
1326                aren't going to be able to do another SUBREG on it.  */
1327             if (GET_CODE (op0) == SUBREG)
1328               op0 = force_reg (imode, op0);
1329           }
1330         else if (REG_P (op0))
1331           {
1332             rtx reg, subreg;
1333             imode = smallest_mode_for_size (GET_MODE_BITSIZE (GET_MODE (op0)),
1334                                             MODE_INT);
1335             reg = gen_reg_rtx (imode);
1336             subreg = gen_lowpart_SUBREG (GET_MODE (op0), reg);
1337             emit_move_insn (subreg, op0);
1338             op0 = reg;
1339             bitnum += SUBREG_BYTE (subreg) * BITS_PER_UNIT;
1340           }
1341         else
1342           {
1343             rtx mem = assign_stack_temp (GET_MODE (op0),
1344                                          GET_MODE_SIZE (GET_MODE (op0)));
1345             emit_move_insn (mem, op0);
1346             op0 = adjust_bitfield_address (mem, BLKmode, 0);
1347           }
1348       }
1349   }
1350
1351   /* ??? We currently assume TARGET is at least as big as BITSIZE.
1352      If that's wrong, the solution is to test for it and set TARGET to 0
1353      if needed.  */
1354
1355   /* If the bitfield is volatile, we need to make sure the access
1356      remains on a type-aligned boundary.  */
1357   if (GET_CODE (op0) == MEM
1358       && MEM_VOLATILE_P (op0)
1359       && GET_MODE_BITSIZE (GET_MODE (op0)) > 0
1360       && flag_strict_volatile_bitfields > 0)
1361     goto no_subreg_mode_swap;
1362
1363   /* Only scalar integer modes can be converted via subregs.  There is an
1364      additional problem for FP modes here in that they can have a precision
1365      which is different from the size.  mode_for_size uses precision, but
1366      we want a mode based on the size, so we must avoid calling it for FP
1367      modes.  */
1368   mode1 = mode;
1369   if (SCALAR_INT_MODE_P (tmode))
1370     {
1371       enum machine_mode try_mode = mode_for_size (bitsize,
1372                                                   GET_MODE_CLASS (tmode), 0);
1373       if (try_mode != BLKmode)
1374         mode1 = try_mode;
1375     }
1376   gcc_assert (mode1 != BLKmode);
1377
1378   /* Extraction of a full MODE1 value can be done with a subreg as long
1379      as the least significant bit of the value is the least significant
1380      bit of either OP0 or a word of OP0.  */
1381   if (!MEM_P (op0)
1382       && lowpart_bit_field_p (bitnum, bitsize, GET_MODE (op0))
1383       && bitsize == GET_MODE_BITSIZE (mode1)
1384       && TRULY_NOOP_TRUNCATION_MODES_P (mode1, GET_MODE (op0)))
1385     {
1386       rtx sub = simplify_gen_subreg (mode1, op0, GET_MODE (op0),
1387                                      bitnum / BITS_PER_UNIT);
1388       if (sub)
1389         return convert_extracted_bit_field (sub, mode, tmode, unsignedp);
1390     }
1391
1392   /* Extraction of a full MODE1 value can be done with a load as long as
1393      the field is on a byte boundary and is sufficiently aligned.  */
1394   if (MEM_P (op0)
1395       && bitnum % BITS_PER_UNIT == 0
1396       && bitsize == GET_MODE_BITSIZE (mode1)
1397       && (!SLOW_UNALIGNED_ACCESS (mode1, MEM_ALIGN (op0))
1398           || (bitnum % bitsize == 0
1399               && MEM_ALIGN (op0) % bitsize == 0)))
1400     {
1401       op0 = adjust_bitfield_address (op0, mode1, bitnum / BITS_PER_UNIT);
1402       return convert_extracted_bit_field (op0, mode, tmode, unsignedp);
1403     }
1404
1405  no_subreg_mode_swap:
1406
1407   /* Handle fields bigger than a word.  */
1408
1409   if (bitsize > BITS_PER_WORD)
1410     {
1411       /* Here we transfer the words of the field
1412          in the order least significant first.
1413          This is because the most significant word is the one which may
1414          be less than full.  */
1415
1416       unsigned int nwords = (bitsize + (BITS_PER_WORD - 1)) / BITS_PER_WORD;
1417       unsigned int i;
1418       rtx last;
1419
1420       if (target == 0 || !REG_P (target) || !valid_multiword_target_p (target))
1421         target = gen_reg_rtx (mode);
1422
1423       /* Indicate for flow that the entire target reg is being set.  */
1424       emit_clobber (target);
1425
1426       last = get_last_insn ();
1427       for (i = 0; i < nwords; i++)
1428         {
1429           /* If I is 0, use the low-order word in both field and target;
1430              if I is 1, use the next to lowest word; and so on.  */
1431           /* Word number in TARGET to use.  */
1432           unsigned int wordnum
1433             = (WORDS_BIG_ENDIAN
1434                ? GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD - i - 1
1435                : i);
1436           /* Offset from start of field in OP0.  */
1437           unsigned int bit_offset = (WORDS_BIG_ENDIAN
1438                                      ? MAX (0, ((int) bitsize - ((int) i + 1)
1439                                                 * (int) BITS_PER_WORD))
1440                                      : (int) i * BITS_PER_WORD);
1441           rtx target_part = operand_subword (target, wordnum, 1, VOIDmode);
1442           rtx result_part
1443             = extract_bit_field_1 (op0, MIN (BITS_PER_WORD,
1444                                              bitsize - i * BITS_PER_WORD),
1445                                    bitnum + bit_offset, 1, false, target_part,
1446                                    mode, word_mode, fallback_p);
1447
1448           gcc_assert (target_part);
1449           if (!result_part)
1450             {
1451               delete_insns_since (last);
1452               return NULL;
1453             }
1454
1455           if (result_part != target_part)
1456             emit_move_insn (target_part, result_part);
1457         }
1458
1459       if (unsignedp)
1460         {
1461           /* Unless we've filled TARGET, the upper regs in a multi-reg value
1462              need to be zero'd out.  */
1463           if (GET_MODE_SIZE (GET_MODE (target)) > nwords * UNITS_PER_WORD)
1464             {
1465               unsigned int i, total_words;
1466
1467               total_words = GET_MODE_SIZE (GET_MODE (target)) / UNITS_PER_WORD;
1468               for (i = nwords; i < total_words; i++)
1469                 emit_move_insn
1470                   (operand_subword (target,
1471                                     WORDS_BIG_ENDIAN ? total_words - i - 1 : i,
1472                                     1, VOIDmode),
1473                    const0_rtx);
1474             }
1475           return target;
1476         }
1477
1478       /* Signed bit field: sign-extend with two arithmetic shifts.  */
1479       target = expand_shift (LSHIFT_EXPR, mode, target,
1480                              GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1481       return expand_shift (RSHIFT_EXPR, mode, target,
1482                            GET_MODE_BITSIZE (mode) - bitsize, NULL_RTX, 0);
1483     }
1484
1485   /* If OP0 is a multi-word register, narrow it to the affected word.
1486      If the region spans two words, defer to extract_split_bit_field.  */
1487   if (!MEM_P (op0) && GET_MODE_SIZE (GET_MODE (op0)) > UNITS_PER_WORD)
1488     {
1489       op0 = simplify_gen_subreg (word_mode, op0, GET_MODE (op0),
1490                                  bitnum / BITS_PER_WORD * UNITS_PER_WORD);
1491       bitnum %= BITS_PER_WORD;
1492       if (bitnum + bitsize > BITS_PER_WORD)
1493         {
1494           if (!fallback_p)
1495             return NULL_RTX;
1496           target = extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1497           return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1498         }
1499     }
1500
1501   /* From here on we know the desired field is smaller than a word.
1502      If OP0 is a register, it too fits within a word.  */
1503
1504   ext_mode = mode_for_extraction (unsignedp ? EP_extzv : EP_extv, 0);
1505   if (ext_mode != MAX_MACHINE_MODE
1506       && bitsize > 0
1507       && GET_MODE_BITSIZE (ext_mode) >= bitsize
1508       /* Do not use extv/extzv for volatile bitfields when
1509          -fstrict-volatile-bitfields is in effect.  */
1510       && !(MEM_P (op0) && MEM_VOLATILE_P (op0)
1511            && flag_strict_volatile_bitfields > 0)
1512       /* If op0 is a register, we need it in EXT_MODE to make it
1513          acceptable to the format of ext(z)v.  */
1514       && !(GET_CODE (op0) == SUBREG && GET_MODE (op0) != ext_mode))
1515     {
1516       struct expand_operand ops[4];
1517       unsigned HOST_WIDE_INT bitpos = bitnum;
1518       rtx xop0 = op0;
1519       rtx xtarget = target;
1520       rtx xspec_target = target;
1521       rtx xspec_target_subreg = 0;
1522       unsigned unit = GET_MODE_BITSIZE (ext_mode);
1523
1524       /* If op0 is a register, we need it in EXT_MODE to make it
1525          acceptable to the format of ext(z)v.  */
1526       if (REG_P (xop0) && GET_MODE (xop0) != ext_mode)
1527         xop0 = gen_lowpart_SUBREG (ext_mode, xop0);
1528
1529       if (MEM_P (xop0))
1530         {
1531           /* Get a reference to the first byte of the field.  */
1532           xop0 = adjust_bitfield_address (xop0, byte_mode,
1533                                           bitpos / BITS_PER_UNIT);
1534           bitpos %= BITS_PER_UNIT;
1535         }
1536       else
1537         {
1538           /* Convert from counting within OP0 to counting in EXT_MODE.  */
1539           if (BYTES_BIG_ENDIAN)
1540             bitpos += unit - GET_MODE_BITSIZE (GET_MODE (op0));
1541         }
1542
1543       /* If BITS_BIG_ENDIAN is zero on a BYTES_BIG_ENDIAN machine, we count
1544          "backwards" from the size of the unit we are extracting from.
1545          Otherwise, we count bits from the most significant on a
1546          BYTES/BITS_BIG_ENDIAN machine.  */
1547
1548       if (BITS_BIG_ENDIAN != BYTES_BIG_ENDIAN)
1549         bitpos = unit - bitsize - bitpos;
1550
1551       if (xtarget == 0)
1552         xtarget = xspec_target = gen_reg_rtx (tmode);
1553
1554       if (GET_MODE (xtarget) != ext_mode)
1555         {
1556           /* Don't use LHS paradoxical subreg if explicit truncation is needed
1557              between the mode of the extraction (word_mode) and the target
1558              mode.  Instead, create a temporary and use convert_move to set
1559              the target.  */
1560           if (REG_P (xtarget)
1561               && TRULY_NOOP_TRUNCATION_MODES_P (GET_MODE (xtarget), ext_mode))
1562             {
1563               xtarget = gen_lowpart (ext_mode, xtarget);
1564               if (GET_MODE_PRECISION (ext_mode)
1565                   > GET_MODE_PRECISION (GET_MODE (xspec_target)))
1566                 xspec_target_subreg = xtarget;
1567             }
1568           else
1569             xtarget = gen_reg_rtx (ext_mode);
1570         }
1571
1572       create_output_operand (&ops[0], xtarget, ext_mode);
1573       create_fixed_operand (&ops[1], xop0);
1574       create_integer_operand (&ops[2], bitsize);
1575       create_integer_operand (&ops[3], bitpos);
1576       if (maybe_expand_insn (unsignedp ? CODE_FOR_extzv : CODE_FOR_extv,
1577                              4, ops))
1578         {
1579           xtarget = ops[0].value;
1580           if (xtarget == xspec_target)
1581             return xtarget;
1582           if (xtarget == xspec_target_subreg)
1583             return xspec_target;
1584           return convert_extracted_bit_field (xtarget, mode, tmode, unsignedp);
1585         }
1586     }
1587
1588   /* If OP0 is a memory, try copying it to a register and seeing if a
1589      cheap register alternative is available.  */
1590   if (ext_mode != MAX_MACHINE_MODE && MEM_P (op0))
1591     {
1592       enum machine_mode bestmode;
1593
1594       /* Get the mode to use for inserting into this field.  If
1595          OP0 is BLKmode, get the smallest mode consistent with the
1596          alignment. If OP0 is a non-BLKmode object that is no
1597          wider than EXT_MODE, use its mode. Otherwise, use the
1598          smallest mode containing the field.  */
1599
1600       if (GET_MODE (op0) == BLKmode
1601           || (ext_mode != MAX_MACHINE_MODE
1602               && GET_MODE_SIZE (GET_MODE (op0)) > GET_MODE_SIZE (ext_mode)))
1603         bestmode = get_best_mode (bitsize, bitnum, 0, 0, MEM_ALIGN (op0),
1604                                   (ext_mode == MAX_MACHINE_MODE
1605                                    ? VOIDmode : ext_mode),
1606                                   MEM_VOLATILE_P (op0));
1607       else
1608         bestmode = GET_MODE (op0);
1609
1610       if (bestmode != VOIDmode
1611           && !(SLOW_UNALIGNED_ACCESS (bestmode, MEM_ALIGN (op0))
1612                && GET_MODE_BITSIZE (bestmode) > MEM_ALIGN (op0)))
1613         {
1614           unsigned HOST_WIDE_INT offset, bitpos;
1615
1616           /* Compute the offset as a multiple of this unit,
1617              counting in bytes.  */
1618           unsigned int unit = GET_MODE_BITSIZE (bestmode);
1619           offset = (bitnum / unit) * GET_MODE_SIZE (bestmode);
1620           bitpos = bitnum % unit;
1621
1622           /* Make sure the register is big enough for the whole field.  */
1623           if (bitpos + bitsize <= unit)
1624             {
1625               rtx last, result, xop0;
1626
1627               last = get_last_insn ();
1628
1629               /* Fetch it to a register in that size.  */
1630               xop0 = adjust_bitfield_address (op0, bestmode, offset);
1631               xop0 = force_reg (bestmode, xop0);
1632               result = extract_bit_field_1 (xop0, bitsize, bitpos,
1633                                             unsignedp, packedp, target,
1634                                             mode, tmode, false);
1635               if (result)
1636                 return result;
1637
1638               delete_insns_since (last);
1639             }
1640         }
1641     }
1642
1643   if (!fallback_p)
1644     return NULL;
1645
1646   /* Find a correspondingly-sized integer field, so we can apply
1647      shifts and masks to it.  */
1648   int_mode = int_mode_for_mode (tmode);
1649   if (int_mode == BLKmode)
1650     int_mode = int_mode_for_mode (mode);
1651   /* Should probably push op0 out to memory and then do a load.  */
1652   gcc_assert (int_mode != BLKmode);
1653
1654   target = extract_fixed_bit_field (int_mode, op0, bitsize, bitnum,
1655                                     target, unsignedp, packedp);
1656   return convert_extracted_bit_field (target, mode, tmode, unsignedp);
1657 }
1658
1659 /* Generate code to extract a byte-field from STR_RTX
1660    containing BITSIZE bits, starting at BITNUM,
1661    and put it in TARGET if possible (if TARGET is nonzero).
1662    Regardless of TARGET, we return the rtx for where the value is placed.
1663
1664    STR_RTX is the structure containing the byte (a REG or MEM).
1665    UNSIGNEDP is nonzero if this is an unsigned bit field.
1666    PACKEDP is nonzero if the field has the packed attribute.
1667    MODE is the natural mode of the field value once extracted.
1668    TMODE is the mode the caller would like the value to have;
1669    but the value may be returned with type MODE instead.
1670
1671    If a TARGET is specified and we can store in it at no extra cost,
1672    we do so, and return TARGET.
1673    Otherwise, we return a REG of mode TMODE or MODE, with TMODE preferred
1674    if they are equally easy.  */
1675
1676 rtx
1677 extract_bit_field (rtx str_rtx, unsigned HOST_WIDE_INT bitsize,
1678                    unsigned HOST_WIDE_INT bitnum, int unsignedp, bool packedp,
1679                    rtx target, enum machine_mode mode, enum machine_mode tmode)
1680 {
1681   return extract_bit_field_1 (str_rtx, bitsize, bitnum, unsignedp, packedp,
1682                               target, mode, tmode, true);
1683 }
1684 \f
1685 /* Use shifts and boolean operations to extract a field of BITSIZE bits
1686    from bit BITNUM of OP0.
1687
1688    UNSIGNEDP is nonzero for an unsigned bit field (don't sign-extend value).
1689    PACKEDP is true if the field has the packed attribute.
1690
1691    If TARGET is nonzero, attempts to store the value there
1692    and return TARGET, but this is not guaranteed.
1693    If TARGET is not used, create a pseudo-reg of mode TMODE for the value.  */
1694
1695 static rtx
1696 extract_fixed_bit_field (enum machine_mode tmode, rtx op0,
1697                          unsigned HOST_WIDE_INT bitsize,
1698                          unsigned HOST_WIDE_INT bitnum, rtx target,
1699                          int unsignedp, bool packedp)
1700 {
1701   enum machine_mode mode;
1702
1703   if (MEM_P (op0))
1704     {
1705       /* Get the proper mode to use for this field.  We want a mode that
1706          includes the entire field.  If such a mode would be larger than
1707          a word, we won't be doing the extraction the normal way.  */
1708
1709       if (MEM_VOLATILE_P (op0)
1710           && flag_strict_volatile_bitfields > 0)
1711         {
1712           if (GET_MODE_BITSIZE (GET_MODE (op0)) > 0)
1713             mode = GET_MODE (op0);
1714           else if (target && GET_MODE_BITSIZE (GET_MODE (target)) > 0)
1715             mode = GET_MODE (target);
1716           else
1717             mode = tmode;
1718         }
1719       else
1720         mode = get_best_mode (bitsize, bitnum, 0, 0,
1721                               MEM_ALIGN (op0), word_mode, MEM_VOLATILE_P (op0));
1722
1723       if (mode == VOIDmode)
1724         /* The only way this should occur is if the field spans word
1725            boundaries.  */
1726         return extract_split_bit_field (op0, bitsize, bitnum, unsignedp);
1727
1728       unsigned int total_bits = GET_MODE_BITSIZE (mode);
1729       HOST_WIDE_INT bit_offset = bitnum - bitnum % total_bits;
1730
1731       /* If we're accessing a volatile MEM, we can't apply BIT_OFFSET
1732          if it results in a multi-word access where we otherwise wouldn't
1733          have one.  So, check for that case here.  */
1734       if (MEM_P (op0)
1735           && MEM_VOLATILE_P (op0)
1736           && flag_strict_volatile_bitfields > 0
1737           && bitnum % BITS_PER_UNIT + bitsize <= total_bits
1738           && bitnum % GET_MODE_BITSIZE (mode) + bitsize > total_bits)
1739         {
1740           if (STRICT_ALIGNMENT)
1741             {
1742               static bool informed_about_misalignment = false;
1743
1744               if (packedp)
1745                 {
1746                   if (bitsize == total_bits)
1747                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1748                                 "multiple accesses to volatile structure"
1749                                 " member because of packed attribute");
1750                   else
1751                     warning_at (input_location, OPT_fstrict_volatile_bitfields,
1752                                 "multiple accesses to volatile structure"
1753                                 " bitfield because of packed attribute");
1754
1755                   return extract_split_bit_field (op0, bitsize, bitnum,
1756                                                   unsignedp);
1757                 }
1758
1759               if (bitsize == total_bits)
1760                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1761                             "mis-aligned access used for structure member");
1762               else
1763                 warning_at (input_location, OPT_fstrict_volatile_bitfields,
1764                             "mis-aligned access used for structure bitfield");
1765
1766               if (! informed_about_misalignment)
1767                 {
1768                   informed_about_misalignment = true;
1769                   inform (input_location,
1770                           "when a volatile object spans multiple type-sized"
1771                           " locations, the compiler must choose between using"
1772                           " a single mis-aligned access to preserve the"
1773                           " volatility, or using multiple aligned accesses"
1774                           " to avoid runtime faults; this code may fail at"
1775                           " runtime if the hardware does not allow this"
1776                           " access");
1777                 }
1778             }
1779           bit_offset = bitnum - bitnum % BITS_PER_UNIT;
1780         }
1781       op0 = adjust_bitfield_address (op0, mode, bit_offset / BITS_PER_UNIT);
1782       bitnum -= bit_offset;
1783     }
1784
1785   mode = GET_MODE (op0);
1786   gcc_assert (SCALAR_INT_MODE_P (mode));
1787
1788   /* Note that bitsize + bitnum can be greater than GET_MODE_BITSIZE (mode)
1789      for invalid input, such as extract equivalent of f5 from
1790      gcc.dg/pr48335-2.c.  */
1791
1792   if (BYTES_BIG_ENDIAN)
1793     /* BITNUM is the distance between our msb and that of OP0.
1794        Convert it to the distance from the lsb.  */
1795     bitnum = GET_MODE_BITSIZE (mode) - bitsize - bitnum;
1796
1797   /* Now BITNUM is always the distance between the field's lsb and that of OP0.
1798      We have reduced the big-endian case to the little-endian case.  */
1799
1800   if (unsignedp)
1801     {
1802       if (bitnum)
1803         {
1804           /* If the field does not already start at the lsb,
1805              shift it so it does.  */
1806           /* Maybe propagate the target for the shift.  */
1807           rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1808           if (tmode != mode)
1809             subtarget = 0;
1810           op0 = expand_shift (RSHIFT_EXPR, mode, op0, bitnum, subtarget, 1);
1811         }
1812       /* Convert the value to the desired mode.  */
1813       if (mode != tmode)
1814         op0 = convert_to_mode (tmode, op0, 1);
1815
1816       /* Unless the msb of the field used to be the msb when we shifted,
1817          mask out the upper bits.  */
1818
1819       if (GET_MODE_BITSIZE (mode) != bitnum + bitsize)
1820         return expand_binop (GET_MODE (op0), and_optab, op0,
1821                              mask_rtx (GET_MODE (op0), 0, bitsize, 0),
1822                              target, 1, OPTAB_LIB_WIDEN);
1823       return op0;
1824     }
1825
1826   /* To extract a signed bit-field, first shift its msb to the msb of the word,
1827      then arithmetic-shift its lsb to the lsb of the word.  */
1828   op0 = force_reg (mode, op0);
1829
1830   /* Find the narrowest integer mode that contains the field.  */
1831
1832   for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT); mode != VOIDmode;
1833        mode = GET_MODE_WIDER_MODE (mode))
1834     if (GET_MODE_BITSIZE (mode) >= bitsize + bitnum)
1835       {
1836         op0 = convert_to_mode (mode, op0, 0);
1837         break;
1838       }
1839
1840   if (mode != tmode)
1841     target = 0;
1842
1843   if (GET_MODE_BITSIZE (mode) != (bitsize + bitnum))
1844     {
1845       int amount = GET_MODE_BITSIZE (mode) - (bitsize + bitnum);
1846       /* Maybe propagate the target for the shift.  */
1847       rtx subtarget = (target != 0 && REG_P (target) ? target : 0);
1848       op0 = expand_shift (LSHIFT_EXPR, mode, op0, amount, subtarget, 1);
1849     }
1850
1851   return expand_shift (RSHIFT_EXPR, mode, op0,
1852                        GET_MODE_BITSIZE (mode) - bitsize, target, 0);
1853 }
1854 \f
1855 /* Return a constant integer (CONST_INT or CONST_DOUBLE) mask value
1856    of mode MODE with BITSIZE ones followed by BITPOS zeros, or the
1857    complement of that if COMPLEMENT.  The mask is truncated if
1858    necessary to the width of mode MODE.  The mask is zero-extended if
1859    BITSIZE+BITPOS is too small for MODE.  */
1860
1861 static rtx
1862 mask_rtx (enum machine_mode mode, int bitpos, int bitsize, int complement)
1863 {
1864   double_int mask;
1865
1866   mask = double_int::mask (bitsize);
1867   mask = mask.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1868
1869   if (complement)
1870     mask = ~mask;
1871
1872   return immed_double_int_const (mask, mode);
1873 }
1874
1875 /* Return a constant integer (CONST_INT or CONST_DOUBLE) rtx with the value
1876    VALUE truncated to BITSIZE bits and then shifted left BITPOS bits.  */
1877
1878 static rtx
1879 lshift_value (enum machine_mode mode, rtx value, int bitpos, int bitsize)
1880 {
1881   double_int val;
1882
1883   val = double_int::from_uhwi (INTVAL (value)).zext (bitsize);
1884   val = val.llshift (bitpos, HOST_BITS_PER_DOUBLE_INT);
1885
1886   return immed_double_int_const (val, mode);
1887 }
1888 \f
1889 /* Extract a bit field that is split across two words
1890    and return an RTX for the result.
1891
1892    OP0 is the REG, SUBREG or MEM rtx for the first of the two words.
1893    BITSIZE is the field width; BITPOS, position of its first bit, in the word.
1894    UNSIGNEDP is 1 if should zero-extend the contents; else sign-extend.  */
1895
1896 static rtx
1897 extract_split_bit_field (rtx op0, unsigned HOST_WIDE_INT bitsize,
1898                          unsigned HOST_WIDE_INT bitpos, int unsignedp)
1899 {
1900   unsigned int unit;
1901   unsigned int bitsdone = 0;
1902   rtx result = NULL_RTX;
1903   int first = 1;
1904
1905   /* Make sure UNIT isn't larger than BITS_PER_WORD, we can only handle that
1906      much at a time.  */
1907   if (REG_P (op0) || GET_CODE (op0) == SUBREG)
1908     unit = BITS_PER_WORD;
1909   else
1910     unit = MIN (MEM_ALIGN (op0), BITS_PER_WORD);
1911
1912   while (bitsdone < bitsize)
1913     {
1914       unsigned HOST_WIDE_INT thissize;
1915       rtx part, word;
1916       unsigned HOST_WIDE_INT thispos;
1917       unsigned HOST_WIDE_INT offset;
1918
1919       offset = (bitpos + bitsdone) / unit;
1920       thispos = (bitpos + bitsdone) % unit;
1921
1922       /* THISSIZE must not overrun a word boundary.  Otherwise,
1923          extract_fixed_bit_field will call us again, and we will mutually
1924          recurse forever.  */
1925       thissize = MIN (bitsize - bitsdone, BITS_PER_WORD);
1926       thissize = MIN (thissize, unit - thispos);
1927
1928       /* If OP0 is a register, then handle OFFSET here.
1929
1930          When handling multiword bitfields, extract_bit_field may pass
1931          down a word_mode SUBREG of a larger REG for a bitfield that actually
1932          crosses a word boundary.  Thus, for a SUBREG, we must find
1933          the current word starting from the base register.  */
1934       if (GET_CODE (op0) == SUBREG)
1935         {
1936           int word_offset = (SUBREG_BYTE (op0) / UNITS_PER_WORD) + offset;
1937           word = operand_subword_force (SUBREG_REG (op0), word_offset,
1938                                         GET_MODE (SUBREG_REG (op0)));
1939           offset = 0;
1940         }
1941       else if (REG_P (op0))
1942         {
1943           word = operand_subword_force (op0, offset, GET_MODE (op0));
1944           offset = 0;
1945         }
1946       else
1947         word = op0;
1948
1949       /* Extract the parts in bit-counting order,
1950          whose meaning is determined by BYTES_PER_UNIT.
1951          OFFSET is in UNITs, and UNIT is in bits.  */
1952       part = extract_fixed_bit_field (word_mode, word, thissize,
1953                                       offset * unit + thispos, 0, 1, false);
1954       bitsdone += thissize;
1955
1956       /* Shift this part into place for the result.  */
1957       if (BYTES_BIG_ENDIAN)
1958         {
1959           if (bitsize != bitsdone)
1960             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1961                                  bitsize - bitsdone, 0, 1);
1962         }
1963       else
1964         {
1965           if (bitsdone != thissize)
1966             part = expand_shift (LSHIFT_EXPR, word_mode, part,
1967                                  bitsdone - thissize, 0, 1);
1968         }
1969
1970       if (first)
1971         result = part;
1972       else
1973         /* Combine the parts with bitwise or.  This works
1974            because we extracted each part as an unsigned bit field.  */
1975         result = expand_binop (word_mode, ior_optab, part, result, NULL_RTX, 1,
1976                                OPTAB_LIB_WIDEN);
1977
1978       first = 0;
1979     }
1980
1981   /* Unsigned bit field: we are done.  */
1982   if (unsignedp)
1983     return result;
1984   /* Signed bit field: sign-extend with two arithmetic shifts.  */
1985   result = expand_shift (LSHIFT_EXPR, word_mode, result,
1986                          BITS_PER_WORD - bitsize, NULL_RTX, 0);
1987   return expand_shift (RSHIFT_EXPR, word_mode, result,
1988                        BITS_PER_WORD - bitsize, NULL_RTX, 0);
1989 }
1990 \f
1991 /* Try to read the low bits of SRC as an rvalue of mode MODE, preserving
1992    the bit pattern.  SRC_MODE is the mode of SRC; if this is smaller than
1993    MODE, fill the upper bits with zeros.  Fail if the layout of either
1994    mode is unknown (as for CC modes) or if the extraction would involve
1995    unprofitable mode punning.  Return the value on success, otherwise
1996    return null.
1997
1998    This is different from gen_lowpart* in these respects:
1999
2000      - the returned value must always be considered an rvalue
2001
2002      - when MODE is wider than SRC_MODE, the extraction involves
2003        a zero extension
2004
2005      - when MODE is smaller than SRC_MODE, the extraction involves
2006        a truncation (and is thus subject to TRULY_NOOP_TRUNCATION).
2007
2008    In other words, this routine performs a computation, whereas the
2009    gen_lowpart* routines are conceptually lvalue or rvalue subreg
2010    operations.  */
2011
2012 rtx
2013 extract_low_bits (enum machine_mode mode, enum machine_mode src_mode, rtx src)
2014 {
2015   enum machine_mode int_mode, src_int_mode;
2016
2017   if (mode == src_mode)
2018     return src;
2019
2020   if (CONSTANT_P (src))
2021     {
2022       /* simplify_gen_subreg can't be used here, as if simplify_subreg
2023          fails, it will happily create (subreg (symbol_ref)) or similar
2024          invalid SUBREGs.  */
2025       unsigned int byte = subreg_lowpart_offset (mode, src_mode);
2026       rtx ret = simplify_subreg (mode, src, src_mode, byte);
2027       if (ret)
2028         return ret;
2029
2030       if (GET_MODE (src) == VOIDmode
2031           || !validate_subreg (mode, src_mode, src, byte))
2032         return NULL_RTX;
2033
2034       src = force_reg (GET_MODE (src), src);
2035       return gen_rtx_SUBREG (mode, src, byte);
2036     }
2037
2038   if (GET_MODE_CLASS (mode) == MODE_CC || GET_MODE_CLASS (src_mode) == MODE_CC)
2039     return NULL_RTX;
2040
2041   if (GET_MODE_BITSIZE (mode) == GET_MODE_BITSIZE (src_mode)
2042       && MODES_TIEABLE_P (mode, src_mode))
2043     {
2044       rtx x = gen_lowpart_common (mode, src);
2045       if (x)
2046         return x;
2047     }
2048
2049   src_int_mode = int_mode_for_mode (src_mode);
2050   int_mode = int_mode_for_mode (mode);
2051   if (src_int_mode == BLKmode || int_mode == BLKmode)
2052     return NULL_RTX;
2053
2054   if (!MODES_TIEABLE_P (src_int_mode, src_mode))
2055     return NULL_RTX;
2056   if (!MODES_TIEABLE_P (int_mode, mode))
2057     return NULL_RTX;
2058
2059   src = gen_lowpart (src_int_mode, src);
2060   src = convert_modes (int_mode, src_int_mode, src, true);
2061   src = gen_lowpart (mode, src);
2062   return src;
2063 }
2064 \f
2065 /* Add INC into TARGET.  */
2066
2067 void
2068 expand_inc (rtx target, rtx inc)
2069 {
2070   rtx value = expand_binop (GET_MODE (target), add_optab,
2071                             target, inc,
2072                             target, 0, OPTAB_LIB_WIDEN);
2073   if (value != target)
2074     emit_move_insn (target, value);
2075 }
2076
2077 /* Subtract DEC from TARGET.  */
2078
2079 void
2080 expand_dec (rtx target, rtx dec)
2081 {
2082   rtx value = expand_binop (GET_MODE (target), sub_optab,
2083                             target, dec,
2084                             target, 0, OPTAB_LIB_WIDEN);
2085   if (value != target)
2086     emit_move_insn (target, value);
2087 }
2088 \f
2089 /* Output a shift instruction for expression code CODE,
2090    with SHIFTED being the rtx for the value to shift,
2091    and AMOUNT the rtx for the amount to shift by.
2092    Store the result in the rtx TARGET, if that is convenient.
2093    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2094    Return the rtx for where the value is.  */
2095
2096 static rtx
2097 expand_shift_1 (enum tree_code code, enum machine_mode mode, rtx shifted,
2098                 rtx amount, rtx target, int unsignedp)
2099 {
2100   rtx op1, temp = 0;
2101   int left = (code == LSHIFT_EXPR || code == LROTATE_EXPR);
2102   int rotate = (code == LROTATE_EXPR || code == RROTATE_EXPR);
2103   optab lshift_optab = ashl_optab;
2104   optab rshift_arith_optab = ashr_optab;
2105   optab rshift_uns_optab = lshr_optab;
2106   optab lrotate_optab = rotl_optab;
2107   optab rrotate_optab = rotr_optab;
2108   enum machine_mode op1_mode;
2109   int attempt;
2110   bool speed = optimize_insn_for_speed_p ();
2111
2112   op1 = amount;
2113   op1_mode = GET_MODE (op1);
2114
2115   /* Determine whether the shift/rotate amount is a vector, or scalar.  If the
2116      shift amount is a vector, use the vector/vector shift patterns.  */
2117   if (VECTOR_MODE_P (mode) && VECTOR_MODE_P (op1_mode))
2118     {
2119       lshift_optab = vashl_optab;
2120       rshift_arith_optab = vashr_optab;
2121       rshift_uns_optab = vlshr_optab;
2122       lrotate_optab = vrotl_optab;
2123       rrotate_optab = vrotr_optab;
2124     }
2125
2126   /* Previously detected shift-counts computed by NEGATE_EXPR
2127      and shifted in the other direction; but that does not work
2128      on all machines.  */
2129
2130   if (SHIFT_COUNT_TRUNCATED)
2131     {
2132       if (CONST_INT_P (op1)
2133           && ((unsigned HOST_WIDE_INT) INTVAL (op1) >=
2134               (unsigned HOST_WIDE_INT) GET_MODE_BITSIZE (mode)))
2135         op1 = GEN_INT ((unsigned HOST_WIDE_INT) INTVAL (op1)
2136                        % GET_MODE_BITSIZE (mode));
2137       else if (GET_CODE (op1) == SUBREG
2138                && subreg_lowpart_p (op1)
2139                && INTEGRAL_MODE_P (GET_MODE (SUBREG_REG (op1))))
2140         op1 = SUBREG_REG (op1);
2141     }
2142
2143   if (op1 == const0_rtx)
2144     return shifted;
2145
2146   /* Check whether its cheaper to implement a left shift by a constant
2147      bit count by a sequence of additions.  */
2148   if (code == LSHIFT_EXPR
2149       && CONST_INT_P (op1)
2150       && INTVAL (op1) > 0
2151       && INTVAL (op1) < GET_MODE_PRECISION (mode)
2152       && INTVAL (op1) < MAX_BITS_PER_WORD
2153       && (shift_cost (speed, mode, INTVAL (op1))
2154           > INTVAL (op1) * add_cost (speed, mode))
2155       && shift_cost (speed, mode, INTVAL (op1)) != MAX_COST)
2156     {
2157       int i;
2158       for (i = 0; i < INTVAL (op1); i++)
2159         {
2160           temp = force_reg (mode, shifted);
2161           shifted = expand_binop (mode, add_optab, temp, temp, NULL_RTX,
2162                                   unsignedp, OPTAB_LIB_WIDEN);
2163         }
2164       return shifted;
2165     }
2166
2167   for (attempt = 0; temp == 0 && attempt < 3; attempt++)
2168     {
2169       enum optab_methods methods;
2170
2171       if (attempt == 0)
2172         methods = OPTAB_DIRECT;
2173       else if (attempt == 1)
2174         methods = OPTAB_WIDEN;
2175       else
2176         methods = OPTAB_LIB_WIDEN;
2177
2178       if (rotate)
2179         {
2180           /* Widening does not work for rotation.  */
2181           if (methods == OPTAB_WIDEN)
2182             continue;
2183           else if (methods == OPTAB_LIB_WIDEN)
2184             {
2185               /* If we have been unable to open-code this by a rotation,
2186                  do it as the IOR of two shifts.  I.e., to rotate A
2187                  by N bits, compute (A << N) | ((unsigned) A >> (C - N))
2188                  where C is the bitsize of A.
2189
2190                  It is theoretically possible that the target machine might
2191                  not be able to perform either shift and hence we would
2192                  be making two libcalls rather than just the one for the
2193                  shift (similarly if IOR could not be done).  We will allow
2194                  this extremely unlikely lossage to avoid complicating the
2195                  code below.  */
2196
2197               rtx subtarget = target == shifted ? 0 : target;
2198               rtx new_amount, other_amount;
2199               rtx temp1;
2200
2201               new_amount = op1;
2202               if (CONST_INT_P (op1))
2203                 other_amount = GEN_INT (GET_MODE_BITSIZE (mode)
2204                                         - INTVAL (op1));
2205               else
2206                 other_amount
2207                   = simplify_gen_binary (MINUS, GET_MODE (op1),
2208                                          GEN_INT (GET_MODE_PRECISION (mode)),
2209                                          op1);
2210
2211               shifted = force_reg (mode, shifted);
2212
2213               temp = expand_shift_1 (left ? LSHIFT_EXPR : RSHIFT_EXPR,
2214                                      mode, shifted, new_amount, 0, 1);
2215               temp1 = expand_shift_1 (left ? RSHIFT_EXPR : LSHIFT_EXPR,
2216                                       mode, shifted, other_amount,
2217                                       subtarget, 1);
2218               return expand_binop (mode, ior_optab, temp, temp1, target,
2219                                    unsignedp, methods);
2220             }
2221
2222           temp = expand_binop (mode,
2223                                left ? lrotate_optab : rrotate_optab,
2224                                shifted, op1, target, unsignedp, methods);
2225         }
2226       else if (unsignedp)
2227         temp = expand_binop (mode,
2228                              left ? lshift_optab : rshift_uns_optab,
2229                              shifted, op1, target, unsignedp, methods);
2230
2231       /* Do arithmetic shifts.
2232          Also, if we are going to widen the operand, we can just as well
2233          use an arithmetic right-shift instead of a logical one.  */
2234       if (temp == 0 && ! rotate
2235           && (! unsignedp || (! left && methods == OPTAB_WIDEN)))
2236         {
2237           enum optab_methods methods1 = methods;
2238
2239           /* If trying to widen a log shift to an arithmetic shift,
2240              don't accept an arithmetic shift of the same size.  */
2241           if (unsignedp)
2242             methods1 = OPTAB_MUST_WIDEN;
2243
2244           /* Arithmetic shift */
2245
2246           temp = expand_binop (mode,
2247                                left ? lshift_optab : rshift_arith_optab,
2248                                shifted, op1, target, unsignedp, methods1);
2249         }
2250
2251       /* We used to try extzv here for logical right shifts, but that was
2252          only useful for one machine, the VAX, and caused poor code
2253          generation there for lshrdi3, so the code was deleted and a
2254          define_expand for lshrsi3 was added to vax.md.  */
2255     }
2256
2257   gcc_assert (temp);
2258   return temp;
2259 }
2260
2261 /* Output a shift instruction for expression code CODE,
2262    with SHIFTED being the rtx for the value to shift,
2263    and AMOUNT the amount to shift by.
2264    Store the result in the rtx TARGET, if that is convenient.
2265    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2266    Return the rtx for where the value is.  */
2267
2268 rtx
2269 expand_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2270               int amount, rtx target, int unsignedp)
2271 {
2272   return expand_shift_1 (code, mode,
2273                          shifted, GEN_INT (amount), target, unsignedp);
2274 }
2275
2276 /* Output a shift instruction for expression code CODE,
2277    with SHIFTED being the rtx for the value to shift,
2278    and AMOUNT the tree for the amount to shift by.
2279    Store the result in the rtx TARGET, if that is convenient.
2280    If UNSIGNEDP is nonzero, do a logical shift; otherwise, arithmetic.
2281    Return the rtx for where the value is.  */
2282
2283 rtx
2284 expand_variable_shift (enum tree_code code, enum machine_mode mode, rtx shifted,
2285                        tree amount, rtx target, int unsignedp)
2286 {
2287   return expand_shift_1 (code, mode,
2288                          shifted, expand_normal (amount), target, unsignedp);
2289 }
2290
2291 \f
2292 /* Indicates the type of fixup needed after a constant multiplication.
2293    BASIC_VARIANT means no fixup is needed, NEGATE_VARIANT means that
2294    the result should be negated, and ADD_VARIANT means that the
2295    multiplicand should be added to the result.  */
2296 enum mult_variant {basic_variant, negate_variant, add_variant};
2297
2298 static void synth_mult (struct algorithm *, unsigned HOST_WIDE_INT,
2299                         const struct mult_cost *, enum machine_mode mode);
2300 static bool choose_mult_variant (enum machine_mode, HOST_WIDE_INT,
2301                                  struct algorithm *, enum mult_variant *, int);
2302 static rtx expand_mult_const (enum machine_mode, rtx, HOST_WIDE_INT, rtx,
2303                               const struct algorithm *, enum mult_variant);
2304 static unsigned HOST_WIDE_INT invert_mod2n (unsigned HOST_WIDE_INT, int);
2305 static rtx extract_high_half (enum machine_mode, rtx);
2306 static rtx expmed_mult_highpart (enum machine_mode, rtx, rtx, rtx, int, int);
2307 static rtx expmed_mult_highpart_optab (enum machine_mode, rtx, rtx, rtx,
2308                                        int, int);
2309 /* Compute and return the best algorithm for multiplying by T.
2310    The algorithm must cost less than cost_limit
2311    If retval.cost >= COST_LIMIT, no algorithm was found and all
2312    other field of the returned struct are undefined.
2313    MODE is the machine mode of the multiplication.  */
2314
2315 static void
2316 synth_mult (struct algorithm *alg_out, unsigned HOST_WIDE_INT t,
2317             const struct mult_cost *cost_limit, enum machine_mode mode)
2318 {
2319   int m;
2320   struct algorithm *alg_in, *best_alg;
2321   struct mult_cost best_cost;
2322   struct mult_cost new_limit;
2323   int op_cost, op_latency;
2324   unsigned HOST_WIDE_INT orig_t = t;
2325   unsigned HOST_WIDE_INT q;
2326   int maxm, hash_index;
2327   bool cache_hit = false;
2328   enum alg_code cache_alg = alg_zero;
2329   bool speed = optimize_insn_for_speed_p ();
2330   enum machine_mode imode;
2331   struct alg_hash_entry *entry_ptr;
2332
2333   /* Indicate that no algorithm is yet found.  If no algorithm
2334      is found, this value will be returned and indicate failure.  */
2335   alg_out->cost.cost = cost_limit->cost + 1;
2336   alg_out->cost.latency = cost_limit->latency + 1;
2337
2338   if (cost_limit->cost < 0
2339       || (cost_limit->cost == 0 && cost_limit->latency <= 0))
2340     return;
2341
2342   /* Be prepared for vector modes.  */
2343   imode = GET_MODE_INNER (mode);
2344   if (imode == VOIDmode)
2345     imode = mode;
2346
2347   maxm = MIN (BITS_PER_WORD, GET_MODE_BITSIZE (imode));
2348
2349   /* Restrict the bits of "t" to the multiplication's mode.  */
2350   t &= GET_MODE_MASK (imode);
2351
2352   /* t == 1 can be done in zero cost.  */
2353   if (t == 1)
2354     {
2355       alg_out->ops = 1;
2356       alg_out->cost.cost = 0;
2357       alg_out->cost.latency = 0;
2358       alg_out->op[0] = alg_m;
2359       return;
2360     }
2361
2362   /* t == 0 sometimes has a cost.  If it does and it exceeds our limit,
2363      fail now.  */
2364   if (t == 0)
2365     {
2366       if (MULT_COST_LESS (cost_limit, zero_cost (speed)))
2367         return;
2368       else
2369         {
2370           alg_out->ops = 1;
2371           alg_out->cost.cost = zero_cost (speed);
2372           alg_out->cost.latency = zero_cost (speed);
2373           alg_out->op[0] = alg_zero;
2374           return;
2375         }
2376     }
2377
2378   /* We'll be needing a couple extra algorithm structures now.  */
2379
2380   alg_in = XALLOCA (struct algorithm);
2381   best_alg = XALLOCA (struct algorithm);
2382   best_cost = *cost_limit;
2383
2384   /* Compute the hash index.  */
2385   hash_index = (t ^ (unsigned int) mode ^ (speed * 256)) % NUM_ALG_HASH_ENTRIES;
2386
2387   /* See if we already know what to do for T.  */
2388   entry_ptr = alg_hash_entry_ptr (hash_index);
2389   if (entry_ptr->t == t
2390       && entry_ptr->mode == mode
2391       && entry_ptr->mode == mode
2392       && entry_ptr->speed == speed
2393       && entry_ptr->alg != alg_unknown)
2394     {
2395       cache_alg = entry_ptr->alg;
2396
2397       if (cache_alg == alg_impossible)
2398         {
2399           /* The cache tells us that it's impossible to synthesize
2400              multiplication by T within entry_ptr->cost.  */
2401           if (!CHEAPER_MULT_COST (&entry_ptr->cost, cost_limit))
2402             /* COST_LIMIT is at least as restrictive as the one
2403                recorded in the hash table, in which case we have no
2404                hope of synthesizing a multiplication.  Just
2405                return.  */
2406             return;
2407
2408           /* If we get here, COST_LIMIT is less restrictive than the
2409              one recorded in the hash table, so we may be able to
2410              synthesize a multiplication.  Proceed as if we didn't
2411              have the cache entry.  */
2412         }
2413       else
2414         {
2415           if (CHEAPER_MULT_COST (cost_limit, &entry_ptr->cost))
2416             /* The cached algorithm shows that this multiplication
2417                requires more cost than COST_LIMIT.  Just return.  This
2418                way, we don't clobber this cache entry with
2419                alg_impossible but retain useful information.  */
2420             return;
2421
2422           cache_hit = true;
2423
2424           switch (cache_alg)
2425             {
2426             case alg_shift:
2427               goto do_alg_shift;
2428
2429             case alg_add_t_m2:
2430             case alg_sub_t_m2:
2431               goto do_alg_addsub_t_m2;
2432
2433             case alg_add_factor:
2434             case alg_sub_factor:
2435               goto do_alg_addsub_factor;
2436
2437             case alg_add_t2_m:
2438               goto do_alg_add_t2_m;
2439
2440             case alg_sub_t2_m:
2441               goto do_alg_sub_t2_m;
2442
2443             default:
2444               gcc_unreachable ();
2445             }
2446         }
2447     }
2448
2449   /* If we have a group of zero bits at the low-order part of T, try
2450      multiplying by the remaining bits and then doing a shift.  */
2451
2452   if ((t & 1) == 0)
2453     {
2454     do_alg_shift:
2455       m = floor_log2 (t & -t);  /* m = number of low zero bits */
2456       if (m < maxm)
2457         {
2458           q = t >> m;
2459           /* The function expand_shift will choose between a shift and
2460              a sequence of additions, so the observed cost is given as
2461              MIN (m * add_cost(speed, mode), shift_cost(speed, mode, m)).  */
2462           op_cost = m * add_cost (speed, mode);
2463           if (shift_cost (speed, mode, m) < op_cost)
2464             op_cost = shift_cost (speed, mode, m);
2465           new_limit.cost = best_cost.cost - op_cost;
2466           new_limit.latency = best_cost.latency - op_cost;
2467           synth_mult (alg_in, q, &new_limit, mode);
2468
2469           alg_in->cost.cost += op_cost;
2470           alg_in->cost.latency += op_cost;
2471           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2472             {
2473               struct algorithm *x;
2474               best_cost = alg_in->cost;
2475               x = alg_in, alg_in = best_alg, best_alg = x;
2476               best_alg->log[best_alg->ops] = m;
2477               best_alg->op[best_alg->ops] = alg_shift;
2478             }
2479
2480           /* See if treating ORIG_T as a signed number yields a better
2481              sequence.  Try this sequence only for a negative ORIG_T
2482              as it would be useless for a non-negative ORIG_T.  */
2483           if ((HOST_WIDE_INT) orig_t < 0)
2484             {
2485               /* Shift ORIG_T as follows because a right shift of a
2486                  negative-valued signed type is implementation
2487                  defined.  */
2488               q = ~(~orig_t >> m);
2489               /* The function expand_shift will choose between a shift
2490                  and a sequence of additions, so the observed cost is
2491                  given as MIN (m * add_cost(speed, mode),
2492                  shift_cost(speed, mode, m)).  */
2493               op_cost = m * add_cost (speed, mode);
2494               if (shift_cost (speed, mode, m) < op_cost)
2495                 op_cost = shift_cost (speed, mode, m);
2496               new_limit.cost = best_cost.cost - op_cost;
2497               new_limit.latency = best_cost.latency - op_cost;
2498               synth_mult (alg_in, q, &new_limit, mode);
2499
2500               alg_in->cost.cost += op_cost;
2501               alg_in->cost.latency += op_cost;
2502               if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2503                 {
2504                   struct algorithm *x;
2505                   best_cost = alg_in->cost;
2506                   x = alg_in, alg_in = best_alg, best_alg = x;
2507                   best_alg->log[best_alg->ops] = m;
2508                   best_alg->op[best_alg->ops] = alg_shift;
2509                 }
2510             }
2511         }
2512       if (cache_hit)
2513         goto done;
2514     }
2515
2516   /* If we have an odd number, add or subtract one.  */
2517   if ((t & 1) != 0)
2518     {
2519       unsigned HOST_WIDE_INT w;
2520
2521     do_alg_addsub_t_m2:
2522       for (w = 1; (w & t) != 0; w <<= 1)
2523         ;
2524       /* If T was -1, then W will be zero after the loop.  This is another
2525          case where T ends with ...111.  Handling this with (T + 1) and
2526          subtract 1 produces slightly better code and results in algorithm
2527          selection much faster than treating it like the ...0111 case
2528          below.  */
2529       if (w == 0
2530           || (w > 2
2531               /* Reject the case where t is 3.
2532                  Thus we prefer addition in that case.  */
2533               && t != 3))
2534         {
2535           /* T ends with ...111.  Multiply by (T + 1) and subtract 1.  */
2536
2537           op_cost = add_cost (speed, mode);
2538           new_limit.cost = best_cost.cost - op_cost;
2539           new_limit.latency = best_cost.latency - op_cost;
2540           synth_mult (alg_in, t + 1, &new_limit, mode);
2541
2542           alg_in->cost.cost += op_cost;
2543           alg_in->cost.latency += op_cost;
2544           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2545             {
2546               struct algorithm *x;
2547               best_cost = alg_in->cost;
2548               x = alg_in, alg_in = best_alg, best_alg = x;
2549               best_alg->log[best_alg->ops] = 0;
2550               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2551             }
2552         }
2553       else
2554         {
2555           /* T ends with ...01 or ...011.  Multiply by (T - 1) and add 1.  */
2556
2557           op_cost = add_cost (speed, mode);
2558           new_limit.cost = best_cost.cost - op_cost;
2559           new_limit.latency = best_cost.latency - op_cost;
2560           synth_mult (alg_in, t - 1, &new_limit, mode);
2561
2562           alg_in->cost.cost += op_cost;
2563           alg_in->cost.latency += op_cost;
2564           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2565             {
2566               struct algorithm *x;
2567               best_cost = alg_in->cost;
2568               x = alg_in, alg_in = best_alg, best_alg = x;
2569               best_alg->log[best_alg->ops] = 0;
2570               best_alg->op[best_alg->ops] = alg_add_t_m2;
2571             }
2572         }
2573
2574       /* We may be able to calculate a * -7, a * -15, a * -31, etc
2575          quickly with a - a * n for some appropriate constant n.  */
2576       m = exact_log2 (-orig_t + 1);
2577       if (m >= 0 && m < maxm)
2578         {
2579           op_cost = shiftsub1_cost (speed, mode, m);
2580           new_limit.cost = best_cost.cost - op_cost;
2581           new_limit.latency = best_cost.latency - op_cost;
2582           synth_mult (alg_in, (unsigned HOST_WIDE_INT) (-orig_t + 1) >> m,
2583                       &new_limit, mode);
2584
2585           alg_in->cost.cost += op_cost;
2586           alg_in->cost.latency += op_cost;
2587           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2588             {
2589               struct algorithm *x;
2590               best_cost = alg_in->cost;
2591               x = alg_in, alg_in = best_alg, best_alg = x;
2592               best_alg->log[best_alg->ops] = m;
2593               best_alg->op[best_alg->ops] = alg_sub_t_m2;
2594             }
2595         }
2596
2597       if (cache_hit)
2598         goto done;
2599     }
2600
2601   /* Look for factors of t of the form
2602      t = q(2**m +- 1), 2 <= m <= floor(log2(t - 1)).
2603      If we find such a factor, we can multiply by t using an algorithm that
2604      multiplies by q, shift the result by m and add/subtract it to itself.
2605
2606      We search for large factors first and loop down, even if large factors
2607      are less probable than small; if we find a large factor we will find a
2608      good sequence quickly, and therefore be able to prune (by decreasing
2609      COST_LIMIT) the search.  */
2610
2611  do_alg_addsub_factor:
2612   for (m = floor_log2 (t - 1); m >= 2; m--)
2613     {
2614       unsigned HOST_WIDE_INT d;
2615
2616       d = ((unsigned HOST_WIDE_INT) 1 << m) + 1;
2617       if (t % d == 0 && t > d && m < maxm
2618           && (!cache_hit || cache_alg == alg_add_factor))
2619         {
2620           /* If the target has a cheap shift-and-add instruction use
2621              that in preference to a shift insn followed by an add insn.
2622              Assume that the shift-and-add is "atomic" with a latency
2623              equal to its cost, otherwise assume that on superscalar
2624              hardware the shift may be executed concurrently with the
2625              earlier steps in the algorithm.  */
2626           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2627           if (shiftadd_cost (speed, mode, m) < op_cost)
2628             {
2629               op_cost = shiftadd_cost (speed, mode, m);
2630               op_latency = op_cost;
2631             }
2632           else
2633             op_latency = add_cost (speed, mode);
2634
2635           new_limit.cost = best_cost.cost - op_cost;
2636           new_limit.latency = best_cost.latency - op_latency;
2637           synth_mult (alg_in, t / d, &new_limit, mode);
2638
2639           alg_in->cost.cost += op_cost;
2640           alg_in->cost.latency += op_latency;
2641           if (alg_in->cost.latency < op_cost)
2642             alg_in->cost.latency = op_cost;
2643           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2644             {
2645               struct algorithm *x;
2646               best_cost = alg_in->cost;
2647               x = alg_in, alg_in = best_alg, best_alg = x;
2648               best_alg->log[best_alg->ops] = m;
2649               best_alg->op[best_alg->ops] = alg_add_factor;
2650             }
2651           /* Other factors will have been taken care of in the recursion.  */
2652           break;
2653         }
2654
2655       d = ((unsigned HOST_WIDE_INT) 1 << m) - 1;
2656       if (t % d == 0 && t > d && m < maxm
2657           && (!cache_hit || cache_alg == alg_sub_factor))
2658         {
2659           /* If the target has a cheap shift-and-subtract insn use
2660              that in preference to a shift insn followed by a sub insn.
2661              Assume that the shift-and-sub is "atomic" with a latency
2662              equal to it's cost, otherwise assume that on superscalar
2663              hardware the shift may be executed concurrently with the
2664              earlier steps in the algorithm.  */
2665           op_cost = add_cost (speed, mode) + shift_cost (speed, mode, m);
2666           if (shiftsub0_cost (speed, mode, m) < op_cost)
2667             {
2668               op_cost = shiftsub0_cost (speed, mode, m);
2669               op_latency = op_cost;
2670             }
2671           else
2672             op_latency = add_cost (speed, mode);
2673
2674           new_limit.cost = best_cost.cost - op_cost;
2675           new_limit.latency = best_cost.latency - op_latency;
2676           synth_mult (alg_in, t / d, &new_limit, mode);
2677
2678           alg_in->cost.cost += op_cost;
2679           alg_in->cost.latency += op_latency;
2680           if (alg_in->cost.latency < op_cost)
2681             alg_in->cost.latency = op_cost;
2682           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2683             {
2684               struct algorithm *x;
2685               best_cost = alg_in->cost;
2686               x = alg_in, alg_in = best_alg, best_alg = x;
2687               best_alg->log[best_alg->ops] = m;
2688               best_alg->op[best_alg->ops] = alg_sub_factor;
2689             }
2690           break;
2691         }
2692     }
2693   if (cache_hit)
2694     goto done;
2695
2696   /* Try shift-and-add (load effective address) instructions,
2697      i.e. do a*3, a*5, a*9.  */
2698   if ((t & 1) != 0)
2699     {
2700     do_alg_add_t2_m:
2701       q = t - 1;
2702       q = q & -q;
2703       m = exact_log2 (q);
2704       if (m >= 0 && m < maxm)
2705         {
2706           op_cost = shiftadd_cost (speed, mode, m);
2707           new_limit.cost = best_cost.cost - op_cost;
2708           new_limit.latency = best_cost.latency - op_cost;
2709           synth_mult (alg_in, (t - 1) >> m, &new_limit, mode);
2710
2711           alg_in->cost.cost += op_cost;
2712           alg_in->cost.latency += op_cost;
2713           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2714             {
2715               struct algorithm *x;
2716               best_cost = alg_in->cost;
2717               x = alg_in, alg_in = best_alg, best_alg = x;
2718               best_alg->log[best_alg->ops] = m;
2719               best_alg->op[best_alg->ops] = alg_add_t2_m;
2720             }
2721         }
2722       if (cache_hit)
2723         goto done;
2724
2725     do_alg_sub_t2_m:
2726       q = t + 1;
2727       q = q & -q;
2728       m = exact_log2 (q);
2729       if (m >= 0 && m < maxm)
2730         {
2731           op_cost = shiftsub0_cost (speed, mode, m);
2732           new_limit.cost = best_cost.cost - op_cost;
2733           new_limit.latency = best_cost.latency - op_cost;
2734           synth_mult (alg_in, (t + 1) >> m, &new_limit, mode);
2735
2736           alg_in->cost.cost += op_cost;
2737           alg_in->cost.latency += op_cost;
2738           if (CHEAPER_MULT_COST (&alg_in->cost, &best_cost))
2739             {
2740               struct algorithm *x;
2741               best_cost = alg_in->cost;
2742               x = alg_in, alg_in = best_alg, best_alg = x;
2743               best_alg->log[best_alg->ops] = m;
2744               best_alg->op[best_alg->ops] = alg_sub_t2_m;
2745             }
2746         }
2747       if (cache_hit)
2748         goto done;
2749     }
2750
2751  done:
2752   /* If best_cost has not decreased, we have not found any algorithm.  */
2753   if (!CHEAPER_MULT_COST (&best_cost, cost_limit))
2754     {
2755       /* We failed to find an algorithm.  Record alg_impossible for
2756          this case (that is, <T, MODE, COST_LIMIT>) so that next time
2757          we are asked to find an algorithm for T within the same or
2758          lower COST_LIMIT, we can immediately return to the
2759          caller.  */
2760       entry_ptr->t = t;
2761       entry_ptr->mode = mode;
2762       entry_ptr->speed = speed;
2763       entry_ptr->alg = alg_impossible;
2764       entry_ptr->cost = *cost_limit;
2765       return;
2766     }
2767
2768   /* Cache the result.  */
2769   if (!cache_hit)
2770     {
2771       entry_ptr->t = t;
2772       entry_ptr->mode = mode;
2773       entry_ptr->speed = speed;
2774       entry_ptr->alg = best_alg->op[best_alg->ops];
2775       entry_ptr->cost.cost = best_cost.cost;
2776       entry_ptr->cost.latency = best_cost.latency;
2777     }
2778
2779   /* If we are getting a too long sequence for `struct algorithm'
2780      to record, make this search fail.  */
2781   if (best_alg->ops == MAX_BITS_PER_WORD)
2782     return;
2783
2784   /* Copy the algorithm from temporary space to the space at alg_out.
2785      We avoid using structure assignment because the majority of
2786      best_alg is normally undefined, and this is a critical function.  */
2787   alg_out->ops = best_alg->ops + 1;
2788   alg_out->cost = best_cost;
2789   memcpy (alg_out->op, best_alg->op,
2790           alg_out->ops * sizeof *alg_out->op);
2791   memcpy (alg_out->log, best_alg->log,
2792           alg_out->ops * sizeof *alg_out->log);
2793 }
2794 \f
2795 /* Find the cheapest way of multiplying a value of mode MODE by VAL.
2796    Try three variations:
2797
2798        - a shift/add sequence based on VAL itself
2799        - a shift/add sequence based on -VAL, followed by a negation
2800        - a shift/add sequence based on VAL - 1, followed by an addition.
2801
2802    Return true if the cheapest of these cost less than MULT_COST,
2803    describing the algorithm in *ALG and final fixup in *VARIANT.  */
2804
2805 static bool
2806 choose_mult_variant (enum machine_mode mode, HOST_WIDE_INT val,
2807                      struct algorithm *alg, enum mult_variant *variant,
2808                      int mult_cost)
2809 {
2810   struct algorithm alg2;
2811   struct mult_cost limit;
2812   int op_cost;
2813   bool speed = optimize_insn_for_speed_p ();
2814
2815   /* Fail quickly for impossible bounds.  */
2816   if (mult_cost < 0)
2817     return false;
2818
2819   /* Ensure that mult_cost provides a reasonable upper bound.
2820      Any constant multiplication can be performed with less
2821      than 2 * bits additions.  */
2822   op_cost = 2 * GET_MODE_UNIT_BITSIZE (mode) * add_cost (speed, mode);
2823   if (mult_cost > op_cost)
2824     mult_cost = op_cost;
2825
2826   *variant = basic_variant;
2827   limit.cost = mult_cost;
2828   limit.latency = mult_cost;
2829   synth_mult (alg, val, &limit, mode);
2830
2831   /* This works only if the inverted value actually fits in an
2832      `unsigned int' */
2833   if (HOST_BITS_PER_INT >= GET_MODE_UNIT_BITSIZE (mode))
2834     {
2835       op_cost = neg_cost(speed, mode);
2836       if (MULT_COST_LESS (&alg->cost, mult_cost))
2837         {
2838           limit.cost = alg->cost.cost - op_cost;
2839           limit.latency = alg->cost.latency - op_cost;
2840         }
2841       else
2842         {
2843           limit.cost = mult_cost - op_cost;
2844           limit.latency = mult_cost - op_cost;
2845         }
2846
2847       synth_mult (&alg2, -val, &limit, mode);
2848       alg2.cost.cost += op_cost;
2849       alg2.cost.latency += op_cost;
2850       if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2851         *alg = alg2, *variant = negate_variant;
2852     }
2853
2854   /* This proves very useful for division-by-constant.  */
2855   op_cost = add_cost (speed, mode);
2856   if (MULT_COST_LESS (&alg->cost, mult_cost))
2857     {
2858       limit.cost = alg->cost.cost - op_cost;
2859       limit.latency = alg->cost.latency - op_cost;
2860     }
2861   else
2862     {
2863       limit.cost = mult_cost - op_cost;
2864       limit.latency = mult_cost - op_cost;
2865     }
2866
2867   synth_mult (&alg2, val - 1, &limit, mode);
2868   alg2.cost.cost += op_cost;
2869   alg2.cost.latency += op_cost;
2870   if (CHEAPER_MULT_COST (&alg2.cost, &alg->cost))
2871     *alg = alg2, *variant = add_variant;
2872
2873   return MULT_COST_LESS (&alg->cost, mult_cost);
2874 }
2875
2876 /* A subroutine of expand_mult, used for constant multiplications.
2877    Multiply OP0 by VAL in mode MODE, storing the result in TARGET if
2878    convenient.  Use the shift/add sequence described by ALG and apply
2879    the final fixup specified by VARIANT.  */
2880
2881 static rtx
2882 expand_mult_const (enum machine_mode mode, rtx op0, HOST_WIDE_INT val,
2883                    rtx target, const struct algorithm *alg,
2884                    enum mult_variant variant)
2885 {
2886   HOST_WIDE_INT val_so_far;
2887   rtx insn, accum, tem;
2888   int opno;
2889   enum machine_mode nmode;
2890
2891   /* Avoid referencing memory over and over and invalid sharing
2892      on SUBREGs.  */
2893   op0 = force_reg (mode, op0);
2894
2895   /* ACCUM starts out either as OP0 or as a zero, depending on
2896      the first operation.  */
2897
2898   if (alg->op[0] == alg_zero)
2899     {
2900       accum = copy_to_mode_reg (mode, CONST0_RTX (mode));
2901       val_so_far = 0;
2902     }
2903   else if (alg->op[0] == alg_m)
2904     {
2905       accum = copy_to_mode_reg (mode, op0);
2906       val_so_far = 1;
2907     }
2908   else
2909     gcc_unreachable ();
2910
2911   for (opno = 1; opno < alg->ops; opno++)
2912     {
2913       int log = alg->log[opno];
2914       rtx shift_subtarget = optimize ? 0 : accum;
2915       rtx add_target
2916         = (opno == alg->ops - 1 && target != 0 && variant != add_variant
2917            && !optimize)
2918           ? target : 0;
2919       rtx accum_target = optimize ? 0 : accum;
2920       rtx accum_inner;
2921
2922       switch (alg->op[opno])
2923         {
2924         case alg_shift:
2925           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2926           /* REG_EQUAL note will be attached to the following insn.  */
2927           emit_move_insn (accum, tem);
2928           val_so_far <<= log;
2929           break;
2930
2931         case alg_add_t_m2:
2932           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2933           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2934                                  add_target ? add_target : accum_target);
2935           val_so_far += (HOST_WIDE_INT) 1 << log;
2936           break;
2937
2938         case alg_sub_t_m2:
2939           tem = expand_shift (LSHIFT_EXPR, mode, op0, log, NULL_RTX, 0);
2940           accum = force_operand (gen_rtx_MINUS (mode, accum, tem),
2941                                  add_target ? add_target : accum_target);
2942           val_so_far -= (HOST_WIDE_INT) 1 << log;
2943           break;
2944
2945         case alg_add_t2_m:
2946           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2947                                 log, shift_subtarget, 0);
2948           accum = force_operand (gen_rtx_PLUS (mode, accum, op0),
2949                                  add_target ? add_target : accum_target);
2950           val_so_far = (val_so_far << log) + 1;
2951           break;
2952
2953         case alg_sub_t2_m:
2954           accum = expand_shift (LSHIFT_EXPR, mode, accum,
2955                                 log, shift_subtarget, 0);
2956           accum = force_operand (gen_rtx_MINUS (mode, accum, op0),
2957                                  add_target ? add_target : accum_target);
2958           val_so_far = (val_so_far << log) - 1;
2959           break;
2960
2961         case alg_add_factor:
2962           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2963           accum = force_operand (gen_rtx_PLUS (mode, accum, tem),
2964                                  add_target ? add_target : accum_target);
2965           val_so_far += val_so_far << log;
2966           break;
2967
2968         case alg_sub_factor:
2969           tem = expand_shift (LSHIFT_EXPR, mode, accum, log, NULL_RTX, 0);
2970           accum = force_operand (gen_rtx_MINUS (mode, tem, accum),
2971                                  (add_target
2972                                   ? add_target : (optimize ? 0 : tem)));
2973           val_so_far = (val_so_far << log) - val_so_far;
2974           break;
2975
2976         default:
2977           gcc_unreachable ();
2978         }
2979
2980       if (SCALAR_INT_MODE_P (mode))
2981         {
2982           /* Write a REG_EQUAL note on the last insn so that we can cse
2983              multiplication sequences.  Note that if ACCUM is a SUBREG,
2984              we've set the inner register and must properly indicate that.  */
2985           tem = op0, nmode = mode;
2986           accum_inner = accum;
2987           if (GET_CODE (accum) == SUBREG)
2988             {
2989               accum_inner = SUBREG_REG (accum);
2990               nmode = GET_MODE (accum_inner);
2991               tem = gen_lowpart (nmode, op0);
2992             }
2993
2994           insn = get_last_insn ();
2995           set_dst_reg_note (insn, REG_EQUAL,
2996                             gen_rtx_MULT (nmode, tem, GEN_INT (val_so_far)),
2997                             accum_inner);
2998         }
2999     }
3000
3001   if (variant == negate_variant)
3002     {
3003       val_so_far = -val_so_far;
3004       accum = expand_unop (mode, neg_optab, accum, target, 0);
3005     }
3006   else if (variant == add_variant)
3007     {
3008       val_so_far = val_so_far + 1;
3009       accum = force_operand (gen_rtx_PLUS (mode, accum, op0), target);
3010     }
3011
3012   /* Compare only the bits of val and val_so_far that are significant
3013      in the result mode, to avoid sign-/zero-extension confusion.  */
3014   nmode = GET_MODE_INNER (mode);
3015   if (nmode == VOIDmode)
3016     nmode = mode;
3017   val &= GET_MODE_MASK (nmode);
3018   val_so_far &= GET_MODE_MASK (nmode);
3019   gcc_assert (val == val_so_far);
3020
3021   return accum;
3022 }
3023
3024 /* Perform a multiplication and return an rtx for the result.
3025    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3026    TARGET is a suggestion for where to store the result (an rtx).
3027
3028    We check specially for a constant integer as OP1.
3029    If you want this check for OP0 as well, then before calling
3030    you should swap the two operands if OP0 would be constant.  */
3031
3032 rtx
3033 expand_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3034              int unsignedp)
3035 {
3036   enum mult_variant variant;
3037   struct algorithm algorithm;
3038   rtx scalar_op1;
3039   int max_cost;
3040   bool speed = optimize_insn_for_speed_p ();
3041   bool do_trapv = flag_trapv && SCALAR_INT_MODE_P (mode) && !unsignedp;
3042
3043   if (CONSTANT_P (op0))
3044     {
3045       rtx temp = op0;
3046       op0 = op1;
3047       op1 = temp;
3048     }
3049
3050   /* For vectors, there are several simplifications that can be made if
3051      all elements of the vector constant are identical.  */
3052   scalar_op1 = op1;
3053   if (GET_CODE (op1) == CONST_VECTOR)
3054     {
3055       int i, n = CONST_VECTOR_NUNITS (op1);
3056       scalar_op1 = CONST_VECTOR_ELT (op1, 0);
3057       for (i = 1; i < n; ++i)
3058         if (!rtx_equal_p (scalar_op1, CONST_VECTOR_ELT (op1, i)))
3059           goto skip_scalar;
3060     }
3061
3062   if (INTEGRAL_MODE_P (mode))
3063     {
3064       rtx fake_reg;
3065       HOST_WIDE_INT coeff;
3066       bool is_neg;
3067       int mode_bitsize;
3068
3069       if (op1 == CONST0_RTX (mode))
3070         return op1;
3071       if (op1 == CONST1_RTX (mode))
3072         return op0;
3073       if (op1 == CONSTM1_RTX (mode))
3074         return expand_unop (mode, do_trapv ? negv_optab : neg_optab,
3075                             op0, target, 0);
3076
3077       if (do_trapv)
3078         goto skip_synth;
3079
3080       /* These are the operations that are potentially turned into
3081          a sequence of shifts and additions.  */
3082       mode_bitsize = GET_MODE_UNIT_BITSIZE (mode);
3083
3084       /* synth_mult does an `unsigned int' multiply.  As long as the mode is
3085          less than or equal in size to `unsigned int' this doesn't matter.
3086          If the mode is larger than `unsigned int', then synth_mult works
3087          only if the constant value exactly fits in an `unsigned int' without
3088          any truncation.  This means that multiplying by negative values does
3089          not work; results are off by 2^32 on a 32 bit machine.  */
3090
3091       if (CONST_INT_P (scalar_op1))
3092         {
3093           coeff = INTVAL (scalar_op1);
3094           is_neg = coeff < 0;
3095         }
3096       else if (CONST_DOUBLE_AS_INT_P (scalar_op1))
3097         {
3098           /* If we are multiplying in DImode, it may still be a win
3099              to try to work with shifts and adds.  */
3100           if (CONST_DOUBLE_HIGH (scalar_op1) == 0
3101               && CONST_DOUBLE_LOW (scalar_op1) > 0)
3102             {
3103               coeff = CONST_DOUBLE_LOW (scalar_op1);
3104               is_neg = false;
3105             }
3106           else if (CONST_DOUBLE_LOW (scalar_op1) == 0)
3107             {
3108               coeff = CONST_DOUBLE_HIGH (scalar_op1);
3109               if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3110                 {
3111                   int shift = floor_log2 (coeff) + HOST_BITS_PER_WIDE_INT;
3112                   if (shift < HOST_BITS_PER_DOUBLE_INT - 1
3113                       || mode_bitsize <= HOST_BITS_PER_DOUBLE_INT)
3114                     return expand_shift (LSHIFT_EXPR, mode, op0,
3115                                          shift, target, unsignedp);
3116                 }
3117               goto skip_synth;
3118             }
3119           else
3120             goto skip_synth;
3121         }
3122       else
3123         goto skip_synth;
3124
3125       /* We used to test optimize here, on the grounds that it's better to
3126          produce a smaller program when -O is not used.  But this causes
3127          such a terrible slowdown sometimes that it seems better to always
3128          use synth_mult.  */
3129
3130       /* Special case powers of two.  */
3131       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3132         return expand_shift (LSHIFT_EXPR, mode, op0,
3133                              floor_log2 (coeff), target, unsignedp);
3134
3135       fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3136
3137       /* Attempt to handle multiplication of DImode values by negative
3138          coefficients, by performing the multiplication by a positive
3139          multiplier and then inverting the result.  */
3140       if (is_neg && mode_bitsize > HOST_BITS_PER_WIDE_INT)
3141         {
3142           /* Its safe to use -coeff even for INT_MIN, as the
3143              result is interpreted as an unsigned coefficient.
3144              Exclude cost of op0 from max_cost to match the cost
3145              calculation of the synth_mult.  */
3146           max_cost = (set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed)
3147                       - neg_cost(speed, mode));
3148           if (max_cost > 0
3149               && choose_mult_variant (mode, -coeff, &algorithm,
3150                                       &variant, max_cost))
3151             {
3152               rtx temp = expand_mult_const (mode, op0, -coeff, NULL_RTX,
3153                                             &algorithm, variant);
3154               return expand_unop (mode, neg_optab, temp, target, 0);
3155             }
3156           goto skip_synth;
3157         }
3158
3159       /* Exclude cost of op0 from max_cost to match the cost
3160          calculation of the synth_mult.  */
3161       max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, op1), speed);
3162       if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3163         return expand_mult_const (mode, op0, coeff, target,
3164                                   &algorithm, variant);
3165     }
3166  skip_synth:
3167
3168   /* Expand x*2.0 as x+x.  */
3169   if (CONST_DOUBLE_AS_FLOAT_P (scalar_op1))
3170     {
3171       REAL_VALUE_TYPE d;
3172       REAL_VALUE_FROM_CONST_DOUBLE (d, scalar_op1);
3173
3174       if (REAL_VALUES_EQUAL (d, dconst2))
3175         {
3176           op0 = force_reg (GET_MODE (op0), op0);
3177           return expand_binop (mode, add_optab, op0, op0,
3178                                target, unsignedp, OPTAB_LIB_WIDEN);
3179         }
3180     }
3181  skip_scalar:
3182
3183   /* This used to use umul_optab if unsigned, but for non-widening multiply
3184      there is no difference between signed and unsigned.  */
3185   op0 = expand_binop (mode, do_trapv ? smulv_optab : smul_optab,
3186                       op0, op1, target, unsignedp, OPTAB_LIB_WIDEN);
3187   gcc_assert (op0);
3188   return op0;
3189 }
3190
3191 /* Return a cost estimate for multiplying a register by the given
3192    COEFFicient in the given MODE and SPEED.  */
3193
3194 int
3195 mult_by_coeff_cost (HOST_WIDE_INT coeff, enum machine_mode mode, bool speed)
3196 {
3197   int max_cost;
3198   struct algorithm algorithm;
3199   enum mult_variant variant;
3200
3201   rtx fake_reg = gen_raw_REG (mode, LAST_VIRTUAL_REGISTER + 1);
3202   max_cost = set_src_cost (gen_rtx_MULT (mode, fake_reg, fake_reg), speed);
3203   if (choose_mult_variant (mode, coeff, &algorithm, &variant, max_cost))
3204     return algorithm.cost.cost;
3205   else
3206     return max_cost;
3207 }
3208
3209 /* Perform a widening multiplication and return an rtx for the result.
3210    MODE is mode of value; OP0 and OP1 are what to multiply (rtx's);
3211    TARGET is a suggestion for where to store the result (an rtx).
3212    THIS_OPTAB is the optab we should use, it must be either umul_widen_optab
3213    or smul_widen_optab.
3214
3215    We check specially for a constant integer as OP1, comparing the
3216    cost of a widening multiply against the cost of a sequence of shifts
3217    and adds.  */
3218
3219 rtx
3220 expand_widening_mult (enum machine_mode mode, rtx op0, rtx op1, rtx target,
3221                       int unsignedp, optab this_optab)
3222 {
3223   bool speed = optimize_insn_for_speed_p ();
3224   rtx cop1;
3225
3226   if (CONST_INT_P (op1)
3227       && GET_MODE (op0) != VOIDmode
3228       && (cop1 = convert_modes (mode, GET_MODE (op0), op1,
3229                                 this_optab == umul_widen_optab))
3230       && CONST_INT_P (cop1)
3231       && (INTVAL (cop1) >= 0
3232           || HWI_COMPUTABLE_MODE_P (mode)))
3233     {
3234       HOST_WIDE_INT coeff = INTVAL (cop1);
3235       int max_cost;
3236       enum mult_variant variant;
3237       struct algorithm algorithm;
3238
3239       /* Special case powers of two.  */
3240       if (EXACT_POWER_OF_2_OR_ZERO_P (coeff))
3241         {
3242           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3243           return expand_shift (LSHIFT_EXPR, mode, op0,
3244                                floor_log2 (coeff), target, unsignedp);
3245         }
3246
3247       /* Exclude cost of op0 from max_cost to match the cost
3248          calculation of the synth_mult.  */
3249       max_cost = mul_widen_cost (speed, mode);
3250       if (choose_mult_variant (mode, coeff, &algorithm, &variant,
3251                                max_cost))
3252         {
3253           op0 = convert_to_mode (mode, op0, this_optab == umul_widen_optab);
3254           return expand_mult_const (mode, op0, coeff, target,
3255                                     &algorithm, variant);
3256         }
3257     }
3258   return expand_binop (mode, this_optab, op0, op1, target,
3259                        unsignedp, OPTAB_LIB_WIDEN);
3260 }
3261 \f
3262 /* Choose a minimal N + 1 bit approximation to 1/D that can be used to
3263    replace division by D, and put the least significant N bits of the result
3264    in *MULTIPLIER_PTR and return the most significant bit.
3265
3266    The width of operations is N (should be <= HOST_BITS_PER_WIDE_INT), the
3267    needed precision is in PRECISION (should be <= N).
3268
3269    PRECISION should be as small as possible so this function can choose
3270    multiplier more freely.
3271
3272    The rounded-up logarithm of D is placed in *lgup_ptr.  A shift count that
3273    is to be used for a final right shift is placed in *POST_SHIFT_PTR.
3274
3275    Using this function, x/D will be equal to (x * m) >> (*POST_SHIFT_PTR),
3276    where m is the full HOST_BITS_PER_WIDE_INT + 1 bit multiplier.  */
3277
3278 unsigned HOST_WIDE_INT
3279 choose_multiplier (unsigned HOST_WIDE_INT d, int n, int precision,
3280                    unsigned HOST_WIDE_INT *multiplier_ptr,
3281                    int *post_shift_ptr, int *lgup_ptr)
3282 {
3283   double_int mhigh, mlow;
3284   int lgup, post_shift;
3285   int pow, pow2;
3286
3287   /* lgup = ceil(log2(divisor)); */
3288   lgup = ceil_log2 (d);
3289
3290   gcc_assert (lgup <= n);
3291
3292   pow = n + lgup;
3293   pow2 = n + lgup - precision;
3294
3295   /* We could handle this with some effort, but this case is much
3296      better handled directly with a scc insn, so rely on caller using
3297      that.  */
3298   gcc_assert (pow != HOST_BITS_PER_DOUBLE_INT);
3299
3300   /* mlow = 2^(N + lgup)/d */
3301   double_int val = double_int_zero.set_bit (pow);
3302   mlow = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3303
3304   /* mhigh = (2^(N + lgup) + 2^(N + lgup - precision))/d */
3305   val |= double_int_zero.set_bit (pow2);
3306   mhigh = val.div (double_int::from_uhwi (d), true, TRUNC_DIV_EXPR);
3307
3308   gcc_assert (!mhigh.high || val.high - d < d);
3309   gcc_assert (mhigh.high <= 1 && mlow.high <= 1);
3310   /* Assert that mlow < mhigh.  */
3311   gcc_assert (mlow.ult (mhigh));
3312
3313   /* If precision == N, then mlow, mhigh exceed 2^N
3314      (but they do not exceed 2^(N+1)).  */
3315
3316   /* Reduce to lowest terms.  */
3317   for (post_shift = lgup; post_shift > 0; post_shift--)
3318     {
3319       int shft = HOST_BITS_PER_WIDE_INT - 1;
3320       unsigned HOST_WIDE_INT ml_lo = (mlow.high << shft) | (mlow.low >> 1);
3321       unsigned HOST_WIDE_INT mh_lo = (mhigh.high << shft) | (mhigh.low >> 1);
3322       if (ml_lo >= mh_lo)
3323         break;
3324
3325       mlow = double_int::from_uhwi (ml_lo);
3326       mhigh = double_int::from_uhwi (mh_lo);
3327     }
3328
3329   *post_shift_ptr = post_shift;
3330   *lgup_ptr = lgup;
3331   if (n < HOST_BITS_PER_WIDE_INT)
3332     {
3333       unsigned HOST_WIDE_INT mask = ((unsigned HOST_WIDE_INT) 1 << n) - 1;
3334       *multiplier_ptr = mhigh.low & mask;
3335       return mhigh.low >= mask;
3336     }
3337   else
3338     {
3339       *multiplier_ptr = mhigh.low;
3340       return mhigh.high;
3341     }
3342 }
3343
3344 /* Compute the inverse of X mod 2**n, i.e., find Y such that X * Y is
3345    congruent to 1 (mod 2**N).  */
3346
3347 static unsigned HOST_WIDE_INT
3348 invert_mod2n (unsigned HOST_WIDE_INT x, int n)
3349 {
3350   /* Solve x*y == 1 (mod 2^n), where x is odd.  Return y.  */
3351
3352   /* The algorithm notes that the choice y = x satisfies
3353      x*y == 1 mod 2^3, since x is assumed odd.
3354      Each iteration doubles the number of bits of significance in y.  */
3355
3356   unsigned HOST_WIDE_INT mask;
3357   unsigned HOST_WIDE_INT y = x;
3358   int nbit = 3;
3359
3360   mask = (n == HOST_BITS_PER_WIDE_INT
3361           ? ~(unsigned HOST_WIDE_INT) 0
3362           : ((unsigned HOST_WIDE_INT) 1 << n) - 1);
3363
3364   while (nbit < n)
3365     {
3366       y = y * (2 - x*y) & mask;         /* Modulo 2^N */
3367       nbit *= 2;
3368     }
3369   return y;
3370 }
3371
3372 /* Emit code to adjust ADJ_OPERAND after multiplication of wrong signedness
3373    flavor of OP0 and OP1.  ADJ_OPERAND is already the high half of the
3374    product OP0 x OP1.  If UNSIGNEDP is nonzero, adjust the signed product
3375    to become unsigned, if UNSIGNEDP is zero, adjust the unsigned product to
3376    become signed.
3377
3378    The result is put in TARGET if that is convenient.
3379
3380    MODE is the mode of operation.  */
3381
3382 rtx
3383 expand_mult_highpart_adjust (enum machine_mode mode, rtx adj_operand, rtx op0,
3384                              rtx op1, rtx target, int unsignedp)
3385 {
3386   rtx tem;
3387   enum rtx_code adj_code = unsignedp ? PLUS : MINUS;
3388
3389   tem = expand_shift (RSHIFT_EXPR, mode, op0,
3390                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3391   tem = expand_and (mode, tem, op1, NULL_RTX);
3392   adj_operand
3393     = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3394                      adj_operand);
3395
3396   tem = expand_shift (RSHIFT_EXPR, mode, op1,
3397                       GET_MODE_BITSIZE (mode) - 1, NULL_RTX, 0);
3398   tem = expand_and (mode, tem, op0, NULL_RTX);
3399   target = force_operand (gen_rtx_fmt_ee (adj_code, mode, adj_operand, tem),
3400                           target);
3401
3402   return target;
3403 }
3404
3405 /* Subroutine of expmed_mult_highpart.  Return the MODE high part of OP.  */
3406
3407 static rtx
3408 extract_high_half (enum machine_mode mode, rtx op)
3409 {
3410   enum machine_mode wider_mode;
3411
3412   if (mode == word_mode)
3413     return gen_highpart (mode, op);
3414
3415   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3416
3417   wider_mode = GET_MODE_WIDER_MODE (mode);
3418   op = expand_shift (RSHIFT_EXPR, wider_mode, op,
3419                      GET_MODE_BITSIZE (mode), 0, 1);
3420   return convert_modes (mode, wider_mode, op, 0);
3421 }
3422
3423 /* Like expmed_mult_highpart, but only consider using a multiplication
3424    optab.  OP1 is an rtx for the constant operand.  */
3425
3426 static rtx
3427 expmed_mult_highpart_optab (enum machine_mode mode, rtx op0, rtx op1,
3428                             rtx target, int unsignedp, int max_cost)
3429 {
3430   rtx narrow_op1 = gen_int_mode (INTVAL (op1), mode);
3431   enum machine_mode wider_mode;
3432   optab moptab;
3433   rtx tem;
3434   int size;
3435   bool speed = optimize_insn_for_speed_p ();
3436
3437   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3438
3439   wider_mode = GET_MODE_WIDER_MODE (mode);
3440   size = GET_MODE_BITSIZE (mode);
3441
3442   /* Firstly, try using a multiplication insn that only generates the needed
3443      high part of the product, and in the sign flavor of unsignedp.  */
3444   if (mul_highpart_cost (speed, mode) < max_cost)
3445     {
3446       moptab = unsignedp ? umul_highpart_optab : smul_highpart_optab;
3447       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3448                           unsignedp, OPTAB_DIRECT);
3449       if (tem)
3450         return tem;
3451     }
3452
3453   /* Secondly, same as above, but use sign flavor opposite of unsignedp.
3454      Need to adjust the result after the multiplication.  */
3455   if (size - 1 < BITS_PER_WORD
3456       && (mul_highpart_cost (speed, mode)
3457           + 2 * shift_cost (speed, mode, size-1)
3458           + 4 * add_cost (speed, mode) < max_cost))
3459     {
3460       moptab = unsignedp ? smul_highpart_optab : umul_highpart_optab;
3461       tem = expand_binop (mode, moptab, op0, narrow_op1, target,
3462                           unsignedp, OPTAB_DIRECT);
3463       if (tem)
3464         /* We used the wrong signedness.  Adjust the result.  */
3465         return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3466                                             tem, unsignedp);
3467     }
3468
3469   /* Try widening multiplication.  */
3470   moptab = unsignedp ? umul_widen_optab : smul_widen_optab;
3471   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3472       && mul_widen_cost (speed, wider_mode) < max_cost)
3473     {
3474       tem = expand_binop (wider_mode, moptab, op0, narrow_op1, 0,
3475                           unsignedp, OPTAB_WIDEN);
3476       if (tem)
3477         return extract_high_half (mode, tem);
3478     }
3479
3480   /* Try widening the mode and perform a non-widening multiplication.  */
3481   if (optab_handler (smul_optab, wider_mode) != CODE_FOR_nothing
3482       && size - 1 < BITS_PER_WORD
3483       && (mul_cost (speed, wider_mode) + shift_cost (speed, mode, size-1)
3484           < max_cost))
3485     {
3486       rtx insns, wop0, wop1;
3487
3488       /* We need to widen the operands, for example to ensure the
3489          constant multiplier is correctly sign or zero extended.
3490          Use a sequence to clean-up any instructions emitted by
3491          the conversions if things don't work out.  */
3492       start_sequence ();
3493       wop0 = convert_modes (wider_mode, mode, op0, unsignedp);
3494       wop1 = convert_modes (wider_mode, mode, op1, unsignedp);
3495       tem = expand_binop (wider_mode, smul_optab, wop0, wop1, 0,
3496                           unsignedp, OPTAB_WIDEN);
3497       insns = get_insns ();
3498       end_sequence ();
3499
3500       if (tem)
3501         {
3502           emit_insn (insns);
3503           return extract_high_half (mode, tem);
3504         }
3505     }
3506
3507   /* Try widening multiplication of opposite signedness, and adjust.  */
3508   moptab = unsignedp ? smul_widen_optab : umul_widen_optab;
3509   if (widening_optab_handler (moptab, wider_mode, mode) != CODE_FOR_nothing
3510       && size - 1 < BITS_PER_WORD
3511       && (mul_widen_cost (speed, wider_mode)
3512           + 2 * shift_cost (speed, mode, size-1)
3513           + 4 * add_cost (speed, mode) < max_cost))
3514     {
3515       tem = expand_binop (wider_mode, moptab, op0, narrow_op1,
3516                           NULL_RTX, ! unsignedp, OPTAB_WIDEN);
3517       if (tem != 0)
3518         {
3519           tem = extract_high_half (mode, tem);
3520           /* We used the wrong signedness.  Adjust the result.  */
3521           return expand_mult_highpart_adjust (mode, tem, op0, narrow_op1,
3522                                               target, unsignedp);
3523         }
3524     }
3525
3526   return 0;
3527 }
3528
3529 /* Emit code to multiply OP0 and OP1 (where OP1 is an integer constant),
3530    putting the high half of the result in TARGET if that is convenient,
3531    and return where the result is.  If the operation can not be performed,
3532    0 is returned.
3533
3534    MODE is the mode of operation and result.
3535
3536    UNSIGNEDP nonzero means unsigned multiply.
3537
3538    MAX_COST is the total allowed cost for the expanded RTL.  */
3539
3540 static rtx
3541 expmed_mult_highpart (enum machine_mode mode, rtx op0, rtx op1,
3542                       rtx target, int unsignedp, int max_cost)
3543 {
3544   enum machine_mode wider_mode = GET_MODE_WIDER_MODE (mode);
3545   unsigned HOST_WIDE_INT cnst1;
3546   int extra_cost;
3547   bool sign_adjust = false;
3548   enum mult_variant variant;
3549   struct algorithm alg;
3550   rtx tem;
3551   bool speed = optimize_insn_for_speed_p ();
3552
3553   gcc_assert (!SCALAR_FLOAT_MODE_P (mode));
3554   /* We can't support modes wider than HOST_BITS_PER_INT.  */
3555   gcc_assert (HWI_COMPUTABLE_MODE_P (mode));
3556
3557   cnst1 = INTVAL (op1) & GET_MODE_MASK (mode);
3558
3559   /* We can't optimize modes wider than BITS_PER_WORD.
3560      ??? We might be able to perform double-word arithmetic if
3561      mode == word_mode, however all the cost calculations in
3562      synth_mult etc. assume single-word operations.  */
3563   if (GET_MODE_BITSIZE (wider_mode) > BITS_PER_WORD)
3564     return expmed_mult_highpart_optab (mode, op0, op1, target,
3565                                        unsignedp, max_cost);
3566
3567   extra_cost = shift_cost (speed, mode, GET_MODE_BITSIZE (mode) - 1);
3568
3569   /* Check whether we try to multiply by a negative constant.  */
3570   if (!unsignedp && ((cnst1 >> (GET_MODE_BITSIZE (mode) - 1)) & 1))
3571     {
3572       sign_adjust = true;
3573       extra_cost += add_cost (speed, mode);
3574     }
3575
3576   /* See whether shift/add multiplication is cheap enough.  */
3577   if (choose_mult_variant (wider_mode, cnst1, &alg, &variant,
3578                            max_cost - extra_cost))
3579     {
3580       /* See whether the specialized multiplication optabs are
3581          cheaper than the shift/add version.  */
3582       tem = expmed_mult_highpart_optab (mode, op0, op1, target, unsignedp,
3583                                         alg.cost.cost + extra_cost);
3584       if (tem)
3585         return tem;
3586
3587       tem = convert_to_mode (wider_mode, op0, unsignedp);
3588       tem = expand_mult_const (wider_mode, tem, cnst1, 0, &alg, variant);
3589       tem = extract_high_half (mode, tem);
3590
3591       /* Adjust result for signedness.  */
3592       if (sign_adjust)
3593         tem = force_operand (gen_rtx_MINUS (mode, tem, op0), tem);
3594
3595       return tem;
3596     }
3597   return expmed_mult_highpart_optab (mode, op0, op1, target,
3598                                      unsignedp, max_cost);
3599 }
3600
3601
3602 /* Expand signed modulus of OP0 by a power of two D in mode MODE.  */
3603
3604 static rtx
3605 expand_smod_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3606 {
3607   unsigned HOST_WIDE_INT masklow, maskhigh;
3608   rtx result, temp, shift, label;
3609   int logd;
3610
3611   logd = floor_log2 (d);
3612   result = gen_reg_rtx (mode);
3613
3614   /* Avoid conditional branches when they're expensive.  */
3615   if (BRANCH_COST (optimize_insn_for_speed_p (), false) >= 2
3616       && optimize_insn_for_speed_p ())
3617     {
3618       rtx signmask = emit_store_flag (result, LT, op0, const0_rtx,
3619                                       mode, 0, -1);
3620       if (signmask)
3621         {
3622           signmask = force_reg (mode, signmask);
3623           masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3624           shift = GEN_INT (GET_MODE_BITSIZE (mode) - logd);
3625
3626           /* Use the rtx_cost of a LSHIFTRT instruction to determine
3627              which instruction sequence to use.  If logical right shifts
3628              are expensive the use 2 XORs, 2 SUBs and an AND, otherwise
3629              use a LSHIFTRT, 1 ADD, 1 SUB and an AND.  */
3630
3631           temp = gen_rtx_LSHIFTRT (mode, result, shift);
3632           if (optab_handler (lshr_optab, mode) == CODE_FOR_nothing
3633               || (set_src_cost (temp, optimize_insn_for_speed_p ())
3634                   > COSTS_N_INSNS (2)))
3635             {
3636               temp = expand_binop (mode, xor_optab, op0, signmask,
3637                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3638               temp = expand_binop (mode, sub_optab, temp, signmask,
3639                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3640               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3641                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3642               temp = expand_binop (mode, xor_optab, temp, signmask,
3643                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3644               temp = expand_binop (mode, sub_optab, temp, signmask,
3645                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3646             }
3647           else
3648             {
3649               signmask = expand_binop (mode, lshr_optab, signmask, shift,
3650                                        NULL_RTX, 1, OPTAB_LIB_WIDEN);
3651               signmask = force_reg (mode, signmask);
3652
3653               temp = expand_binop (mode, add_optab, op0, signmask,
3654                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3655               temp = expand_binop (mode, and_optab, temp, GEN_INT (masklow),
3656                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3657               temp = expand_binop (mode, sub_optab, temp, signmask,
3658                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
3659             }
3660           return temp;
3661         }
3662     }
3663
3664   /* Mask contains the mode's signbit and the significant bits of the
3665      modulus.  By including the signbit in the operation, many targets
3666      can avoid an explicit compare operation in the following comparison
3667      against zero.  */
3668
3669   masklow = ((HOST_WIDE_INT) 1 << logd) - 1;
3670   if (GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
3671     {
3672       masklow |= (HOST_WIDE_INT) -1 << (GET_MODE_BITSIZE (mode) - 1);
3673       maskhigh = -1;
3674     }
3675   else
3676     maskhigh = (HOST_WIDE_INT) -1
3677                  << (GET_MODE_BITSIZE (mode) - HOST_BITS_PER_WIDE_INT - 1);
3678
3679   temp = expand_binop (mode, and_optab, op0,
3680                        immed_double_const (masklow, maskhigh, mode),
3681                        result, 1, OPTAB_LIB_WIDEN);
3682   if (temp != result)
3683     emit_move_insn (result, temp);
3684
3685   label = gen_label_rtx ();
3686   do_cmp_and_jump (result, const0_rtx, GE, mode, label);
3687
3688   temp = expand_binop (mode, sub_optab, result, const1_rtx, result,
3689                        0, OPTAB_LIB_WIDEN);
3690   masklow = (HOST_WIDE_INT) -1 << logd;
3691   maskhigh = -1;
3692   temp = expand_binop (mode, ior_optab, temp,
3693                        immed_double_const (masklow, maskhigh, mode),
3694                        result, 1, OPTAB_LIB_WIDEN);
3695   temp = expand_binop (mode, add_optab, temp, const1_rtx, result,
3696                        0, OPTAB_LIB_WIDEN);
3697   if (temp != result)
3698     emit_move_insn (result, temp);
3699   emit_label (label);
3700   return result;
3701 }
3702
3703 /* Expand signed division of OP0 by a power of two D in mode MODE.
3704    This routine is only called for positive values of D.  */
3705
3706 static rtx
3707 expand_sdiv_pow2 (enum machine_mode mode, rtx op0, HOST_WIDE_INT d)
3708 {
3709   rtx temp, label;
3710   int logd;
3711
3712   logd = floor_log2 (d);
3713
3714   if (d == 2
3715       && BRANCH_COST (optimize_insn_for_speed_p (),
3716                       false) >= 1)
3717     {
3718       temp = gen_reg_rtx (mode);
3719       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, 1);
3720       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3721                            0, OPTAB_LIB_WIDEN);
3722       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3723     }
3724
3725 #ifdef HAVE_conditional_move
3726   if (BRANCH_COST (optimize_insn_for_speed_p (), false)
3727       >= 2)
3728     {
3729       rtx temp2;
3730
3731       /* ??? emit_conditional_move forces a stack adjustment via
3732          compare_from_rtx so, if the sequence is discarded, it will
3733          be lost.  Do it now instead.  */
3734       do_pending_stack_adjust ();
3735
3736       start_sequence ();
3737       temp2 = copy_to_mode_reg (mode, op0);
3738       temp = expand_binop (mode, add_optab, temp2, GEN_INT (d-1),
3739                            NULL_RTX, 0, OPTAB_LIB_WIDEN);
3740       temp = force_reg (mode, temp);
3741
3742       /* Construct "temp2 = (temp2 < 0) ? temp : temp2".  */
3743       temp2 = emit_conditional_move (temp2, LT, temp2, const0_rtx,
3744                                      mode, temp, temp2, mode, 0);
3745       if (temp2)
3746         {
3747           rtx seq = get_insns ();
3748           end_sequence ();
3749           emit_insn (seq);
3750           return expand_shift (RSHIFT_EXPR, mode, temp2, logd, NULL_RTX, 0);
3751         }
3752       end_sequence ();
3753     }
3754 #endif
3755
3756   if (BRANCH_COST (optimize_insn_for_speed_p (),
3757                    false) >= 2)
3758     {
3759       int ushift = GET_MODE_BITSIZE (mode) - logd;
3760
3761       temp = gen_reg_rtx (mode);
3762       temp = emit_store_flag (temp, LT, op0, const0_rtx, mode, 0, -1);
3763       if (shift_cost (optimize_insn_for_speed_p (), mode, ushift)
3764           > COSTS_N_INSNS (1))
3765         temp = expand_binop (mode, and_optab, temp, GEN_INT (d - 1),
3766                              NULL_RTX, 0, OPTAB_LIB_WIDEN);
3767       else
3768         temp = expand_shift (RSHIFT_EXPR, mode, temp,
3769                              ushift, NULL_RTX, 1);
3770       temp = expand_binop (mode, add_optab, temp, op0, NULL_RTX,
3771                            0, OPTAB_LIB_WIDEN);
3772       return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3773     }
3774
3775   label = gen_label_rtx ();
3776   temp = copy_to_mode_reg (mode, op0);
3777   do_cmp_and_jump (temp, const0_rtx, GE, mode, label);
3778   expand_inc (temp, GEN_INT (d - 1));
3779   emit_label (label);
3780   return expand_shift (RSHIFT_EXPR, mode, temp, logd, NULL_RTX, 0);
3781 }
3782 \f
3783 /* Emit the code to divide OP0 by OP1, putting the result in TARGET
3784    if that is convenient, and returning where the result is.
3785    You may request either the quotient or the remainder as the result;
3786    specify REM_FLAG nonzero to get the remainder.
3787
3788    CODE is the expression code for which kind of division this is;
3789    it controls how rounding is done.  MODE is the machine mode to use.
3790    UNSIGNEDP nonzero means do unsigned division.  */
3791
3792 /* ??? For CEIL_MOD_EXPR, can compute incorrect remainder with ANDI
3793    and then correct it by or'ing in missing high bits
3794    if result of ANDI is nonzero.
3795    For ROUND_MOD_EXPR, can use ANDI and then sign-extend the result.
3796    This could optimize to a bfexts instruction.
3797    But C doesn't use these operations, so their optimizations are
3798    left for later.  */
3799 /* ??? For modulo, we don't actually need the highpart of the first product,
3800    the low part will do nicely.  And for small divisors, the second multiply
3801    can also be a low-part only multiply or even be completely left out.
3802    E.g. to calculate the remainder of a division by 3 with a 32 bit
3803    multiply, multiply with 0x55555556 and extract the upper two bits;
3804    the result is exact for inputs up to 0x1fffffff.
3805    The input range can be reduced by using cross-sum rules.
3806    For odd divisors >= 3, the following table gives right shift counts
3807    so that if a number is shifted by an integer multiple of the given
3808    amount, the remainder stays the same:
3809    2, 4, 3, 6, 10, 12, 4, 8, 18, 6, 11, 20, 18, 0, 5, 10, 12, 0, 12, 20,
3810    14, 12, 23, 21, 8, 0, 20, 18, 0, 0, 6, 12, 0, 22, 0, 18, 20, 30, 0, 0,
3811    0, 8, 0, 11, 12, 10, 36, 0, 30, 0, 0, 12, 0, 0, 0, 0, 44, 12, 24, 0,
3812    20, 0, 7, 14, 0, 18, 36, 0, 0, 46, 60, 0, 42, 0, 15, 24, 20, 0, 0, 33,
3813    0, 20, 0, 0, 18, 0, 60, 0, 0, 0, 0, 0, 40, 18, 0, 0, 12
3814
3815    Cross-sum rules for even numbers can be derived by leaving as many bits
3816    to the right alone as the divisor has zeros to the right.
3817    E.g. if x is an unsigned 32 bit number:
3818    (x mod 12) == (((x & 1023) + ((x >> 8) & ~3)) * 0x15555558 >> 2 * 3) >> 28
3819    */
3820
3821 rtx
3822 expand_divmod (int rem_flag, enum tree_code code, enum machine_mode mode,
3823                rtx op0, rtx op1, rtx target, int unsignedp)
3824 {
3825   enum machine_mode compute_mode;
3826   rtx tquotient;
3827   rtx quotient = 0, remainder = 0;
3828   rtx last;
3829   int size;
3830   rtx insn;
3831   optab optab1, optab2;
3832   int op1_is_constant, op1_is_pow2 = 0;
3833   int max_cost, extra_cost;
3834   static HOST_WIDE_INT last_div_const = 0;
3835   static HOST_WIDE_INT ext_op1;
3836   bool speed = optimize_insn_for_speed_p ();
3837
3838   op1_is_constant = CONST_INT_P (op1);
3839   if (op1_is_constant)
3840     {
3841       ext_op1 = INTVAL (op1);
3842       if (unsignedp)
3843         ext_op1 &= GET_MODE_MASK (mode);
3844       op1_is_pow2 = ((EXACT_POWER_OF_2_OR_ZERO_P (ext_op1)
3845                      || (! unsignedp && EXACT_POWER_OF_2_OR_ZERO_P (-ext_op1))));
3846     }
3847
3848   /*
3849      This is the structure of expand_divmod:
3850
3851      First comes code to fix up the operands so we can perform the operations
3852      correctly and efficiently.
3853
3854      Second comes a switch statement with code specific for each rounding mode.
3855      For some special operands this code emits all RTL for the desired
3856      operation, for other cases, it generates only a quotient and stores it in
3857      QUOTIENT.  The case for trunc division/remainder might leave quotient = 0,
3858      to indicate that it has not done anything.
3859
3860      Last comes code that finishes the operation.  If QUOTIENT is set and
3861      REM_FLAG is set, the remainder is computed as OP0 - QUOTIENT * OP1.  If
3862      QUOTIENT is not set, it is computed using trunc rounding.
3863
3864      We try to generate special code for division and remainder when OP1 is a
3865      constant.  If |OP1| = 2**n we can use shifts and some other fast
3866      operations.  For other values of OP1, we compute a carefully selected
3867      fixed-point approximation m = 1/OP1, and generate code that multiplies OP0
3868      by m.
3869
3870      In all cases but EXACT_DIV_EXPR, this multiplication requires the upper
3871      half of the product.  Different strategies for generating the product are
3872      implemented in expmed_mult_highpart.
3873
3874      If what we actually want is the remainder, we generate that by another
3875      by-constant multiplication and a subtraction.  */
3876
3877   /* We shouldn't be called with OP1 == const1_rtx, but some of the
3878      code below will malfunction if we are, so check here and handle
3879      the special case if so.  */
3880   if (op1 == const1_rtx)
3881     return rem_flag ? const0_rtx : op0;
3882
3883     /* When dividing by -1, we could get an overflow.
3884      negv_optab can handle overflows.  */
3885   if (! unsignedp && op1 == constm1_rtx)
3886     {
3887       if (rem_flag)
3888         return const0_rtx;
3889       return expand_unop (mode, flag_trapv && GET_MODE_CLASS(mode) == MODE_INT
3890                           ? negv_optab : neg_optab, op0, target, 0);
3891     }
3892
3893   if (target
3894       /* Don't use the function value register as a target
3895          since we have to read it as well as write it,
3896          and function-inlining gets confused by this.  */
3897       && ((REG_P (target) && REG_FUNCTION_VALUE_P (target))
3898           /* Don't clobber an operand while doing a multi-step calculation.  */
3899           || ((rem_flag || op1_is_constant)
3900               && (reg_mentioned_p (target, op0)
3901                   || (MEM_P (op0) && MEM_P (target))))
3902           || reg_mentioned_p (target, op1)
3903           || (MEM_P (op1) && MEM_P (target))))
3904     target = 0;
3905
3906   /* Get the mode in which to perform this computation.  Normally it will
3907      be MODE, but sometimes we can't do the desired operation in MODE.
3908      If so, pick a wider mode in which we can do the operation.  Convert
3909      to that mode at the start to avoid repeated conversions.
3910
3911      First see what operations we need.  These depend on the expression
3912      we are evaluating.  (We assume that divxx3 insns exist under the
3913      same conditions that modxx3 insns and that these insns don't normally
3914      fail.  If these assumptions are not correct, we may generate less
3915      efficient code in some cases.)
3916
3917      Then see if we find a mode in which we can open-code that operation
3918      (either a division, modulus, or shift).  Finally, check for the smallest
3919      mode for which we can do the operation with a library call.  */
3920
3921   /* We might want to refine this now that we have division-by-constant
3922      optimization.  Since expmed_mult_highpart tries so many variants, it is
3923      not straightforward to generalize this.  Maybe we should make an array
3924      of possible modes in init_expmed?  Save this for GCC 2.7.  */
3925
3926   optab1 = ((op1_is_pow2 && op1 != const0_rtx)
3927             ? (unsignedp ? lshr_optab : ashr_optab)
3928             : (unsignedp ? udiv_optab : sdiv_optab));
3929   optab2 = ((op1_is_pow2 && op1 != const0_rtx)
3930             ? optab1
3931             : (unsignedp ? udivmod_optab : sdivmod_optab));
3932
3933   for (compute_mode = mode; compute_mode != VOIDmode;
3934        compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3935     if (optab_handler (optab1, compute_mode) != CODE_FOR_nothing
3936         || optab_handler (optab2, compute_mode) != CODE_FOR_nothing)
3937       break;
3938
3939   if (compute_mode == VOIDmode)
3940     for (compute_mode = mode; compute_mode != VOIDmode;
3941          compute_mode = GET_MODE_WIDER_MODE (compute_mode))
3942       if (optab_libfunc (optab1, compute_mode)
3943           || optab_libfunc (optab2, compute_mode))
3944         break;
3945
3946   /* If we still couldn't find a mode, use MODE, but expand_binop will
3947      probably die.  */
3948   if (compute_mode == VOIDmode)
3949     compute_mode = mode;
3950
3951   if (target && GET_MODE (target) == compute_mode)
3952     tquotient = target;
3953   else
3954     tquotient = gen_reg_rtx (compute_mode);
3955
3956   size = GET_MODE_BITSIZE (compute_mode);
3957 #if 0
3958   /* It should be possible to restrict the precision to GET_MODE_BITSIZE
3959      (mode), and thereby get better code when OP1 is a constant.  Do that
3960      later.  It will require going over all usages of SIZE below.  */
3961   size = GET_MODE_BITSIZE (mode);
3962 #endif
3963
3964   /* Only deduct something for a REM if the last divide done was
3965      for a different constant.   Then set the constant of the last
3966      divide.  */
3967   max_cost = (unsignedp
3968               ? udiv_cost (speed, compute_mode)
3969               : sdiv_cost (speed, compute_mode));
3970   if (rem_flag && ! (last_div_const != 0 && op1_is_constant
3971                      && INTVAL (op1) == last_div_const))
3972     max_cost -= (mul_cost (speed, compute_mode)
3973                  + add_cost (speed, compute_mode));
3974
3975   last_div_const = ! rem_flag && op1_is_constant ? INTVAL (op1) : 0;
3976
3977   /* Now convert to the best mode to use.  */
3978   if (compute_mode != mode)
3979     {
3980       op0 = convert_modes (compute_mode, mode, op0, unsignedp);
3981       op1 = convert_modes (compute_mode, mode, op1, unsignedp);
3982
3983       /* convert_modes may have placed op1 into a register, so we
3984          must recompute the following.  */
3985       op1_is_constant = CONST_INT_P (op1);
3986       op1_is_pow2 = (op1_is_constant
3987                      && ((EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
3988                           || (! unsignedp
3989                               && EXACT_POWER_OF_2_OR_ZERO_P (-INTVAL (op1)))))) ;
3990     }
3991
3992   /* If one of the operands is a volatile MEM, copy it into a register.  */
3993
3994   if (MEM_P (op0) && MEM_VOLATILE_P (op0))
3995     op0 = force_reg (compute_mode, op0);
3996   if (MEM_P (op1) && MEM_VOLATILE_P (op1))
3997     op1 = force_reg (compute_mode, op1);
3998
3999   /* If we need the remainder or if OP1 is constant, we need to
4000      put OP0 in a register in case it has any queued subexpressions.  */
4001   if (rem_flag || op1_is_constant)
4002     op0 = force_reg (compute_mode, op0);
4003
4004   last = get_last_insn ();
4005
4006   /* Promote floor rounding to trunc rounding for unsigned operations.  */
4007   if (unsignedp)
4008     {
4009       if (code == FLOOR_DIV_EXPR)
4010         code = TRUNC_DIV_EXPR;
4011       if (code == FLOOR_MOD_EXPR)
4012         code = TRUNC_MOD_EXPR;
4013       if (code == EXACT_DIV_EXPR && op1_is_pow2)
4014         code = TRUNC_DIV_EXPR;
4015     }
4016
4017   if (op1 != const0_rtx)
4018     switch (code)
4019       {
4020       case TRUNC_MOD_EXPR:
4021       case TRUNC_DIV_EXPR:
4022         if (op1_is_constant)
4023           {
4024             if (unsignedp)
4025               {
4026                 unsigned HOST_WIDE_INT mh, ml;
4027                 int pre_shift, post_shift;
4028                 int dummy;
4029                 unsigned HOST_WIDE_INT d = (INTVAL (op1)
4030                                             & GET_MODE_MASK (compute_mode));
4031
4032                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4033                   {
4034                     pre_shift = floor_log2 (d);
4035                     if (rem_flag)
4036                       {
4037                         remainder
4038                           = expand_binop (compute_mode, and_optab, op0,
4039                                           GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4040                                           remainder, 1,
4041                                           OPTAB_LIB_WIDEN);
4042                         if (remainder)
4043                           return gen_lowpart (mode, remainder);
4044                       }
4045                     quotient = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4046                                              pre_shift, tquotient, 1);
4047                   }
4048                 else if (size <= HOST_BITS_PER_WIDE_INT)
4049                   {
4050                     if (d >= ((unsigned HOST_WIDE_INT) 1 << (size - 1)))
4051                       {
4052                         /* Most significant bit of divisor is set; emit an scc
4053                            insn.  */
4054                         quotient = emit_store_flag_force (tquotient, GEU, op0, op1,
4055                                                           compute_mode, 1, 1);
4056                       }
4057                     else
4058                       {
4059                         /* Find a suitable multiplier and right shift count
4060                            instead of multiplying with D.  */
4061
4062                         mh = choose_multiplier (d, size, size,
4063                                                 &ml, &post_shift, &dummy);
4064
4065                         /* If the suggested multiplier is more than SIZE bits,
4066                            we can do better for even divisors, using an
4067                            initial right shift.  */
4068                         if (mh != 0 && (d & 1) == 0)
4069                           {
4070                             pre_shift = floor_log2 (d & -d);
4071                             mh = choose_multiplier (d >> pre_shift, size,
4072                                                     size - pre_shift,
4073                                                     &ml, &post_shift, &dummy);
4074                             gcc_assert (!mh);
4075                           }
4076                         else
4077                           pre_shift = 0;
4078
4079                         if (mh != 0)
4080                           {
4081                             rtx t1, t2, t3, t4;
4082
4083                             if (post_shift - 1 >= BITS_PER_WORD)
4084                               goto fail1;
4085
4086                             extra_cost
4087                               = (shift_cost (speed, compute_mode, post_shift - 1)
4088                                  + shift_cost (speed, compute_mode, 1)
4089                                  + 2 * add_cost (speed, compute_mode));
4090                             t1 = expmed_mult_highpart (compute_mode, op0,
4091                                                        GEN_INT (ml),
4092                                                        NULL_RTX, 1,
4093                                                        max_cost - extra_cost);
4094                             if (t1 == 0)
4095                               goto fail1;
4096                             t2 = force_operand (gen_rtx_MINUS (compute_mode,
4097                                                                op0, t1),
4098                                                 NULL_RTX);
4099                             t3 = expand_shift (RSHIFT_EXPR, compute_mode,
4100                                                t2, 1, NULL_RTX, 1);
4101                             t4 = force_operand (gen_rtx_PLUS (compute_mode,
4102                                                               t1, t3),
4103                                                 NULL_RTX);
4104                             quotient = expand_shift
4105                               (RSHIFT_EXPR, compute_mode, t4,
4106                                post_shift - 1, tquotient, 1);
4107                           }
4108                         else
4109                           {
4110                             rtx t1, t2;
4111
4112                             if (pre_shift >= BITS_PER_WORD
4113                                 || post_shift >= BITS_PER_WORD)
4114                               goto fail1;
4115
4116                             t1 = expand_shift
4117                               (RSHIFT_EXPR, compute_mode, op0,
4118                                pre_shift, NULL_RTX, 1);
4119                             extra_cost
4120                               = (shift_cost (speed, compute_mode, pre_shift)
4121                                  + shift_cost (speed, compute_mode, post_shift));
4122                             t2 = expmed_mult_highpart (compute_mode, t1,
4123                                                        GEN_INT (ml),
4124                                                        NULL_RTX, 1,
4125                                                        max_cost - extra_cost);
4126                             if (t2 == 0)
4127                               goto fail1;
4128                             quotient = expand_shift
4129                               (RSHIFT_EXPR, compute_mode, t2,
4130                                post_shift, tquotient, 1);
4131                           }
4132                       }
4133                   }
4134                 else            /* Too wide mode to use tricky code */
4135                   break;
4136
4137                 insn = get_last_insn ();
4138                 if (insn != last)
4139                   set_dst_reg_note (insn, REG_EQUAL,
4140                                     gen_rtx_UDIV (compute_mode, op0, op1),
4141                                     quotient);
4142               }
4143             else                /* TRUNC_DIV, signed */
4144               {
4145                 unsigned HOST_WIDE_INT ml;
4146                 int lgup, post_shift;
4147                 rtx mlr;
4148                 HOST_WIDE_INT d = INTVAL (op1);
4149                 unsigned HOST_WIDE_INT abs_d;
4150
4151                 /* Since d might be INT_MIN, we have to cast to
4152                    unsigned HOST_WIDE_INT before negating to avoid
4153                    undefined signed overflow.  */
4154                 abs_d = (d >= 0
4155                          ? (unsigned HOST_WIDE_INT) d
4156                          : - (unsigned HOST_WIDE_INT) d);
4157
4158                 /* n rem d = n rem -d */
4159                 if (rem_flag && d < 0)
4160                   {
4161                     d = abs_d;
4162                     op1 = gen_int_mode (abs_d, compute_mode);
4163                   }
4164
4165                 if (d == 1)
4166                   quotient = op0;
4167                 else if (d == -1)
4168                   quotient = expand_unop (compute_mode, neg_optab, op0,
4169                                           tquotient, 0);
4170                 else if (HOST_BITS_PER_WIDE_INT >= size
4171                          && abs_d == (unsigned HOST_WIDE_INT) 1 << (size - 1))
4172                   {
4173                     /* This case is not handled correctly below.  */
4174                     quotient = emit_store_flag (tquotient, EQ, op0, op1,
4175                                                 compute_mode, 1, 1);
4176                     if (quotient == 0)
4177                       goto fail1;
4178                   }
4179                 else if (EXACT_POWER_OF_2_OR_ZERO_P (d)
4180                          && (rem_flag
4181                              ? smod_pow2_cheap (speed, compute_mode)
4182                              : sdiv_pow2_cheap (speed, compute_mode))
4183                          /* We assume that cheap metric is true if the
4184                             optab has an expander for this mode.  */
4185                          && ((optab_handler ((rem_flag ? smod_optab
4186                                               : sdiv_optab),
4187                                              compute_mode)
4188                               != CODE_FOR_nothing)
4189                              || (optab_handler (sdivmod_optab,
4190                                                 compute_mode)
4191                                  != CODE_FOR_nothing)))
4192                   ;
4193                 else if (EXACT_POWER_OF_2_OR_ZERO_P (abs_d))
4194                   {
4195                     if (rem_flag)
4196                       {
4197                         remainder = expand_smod_pow2 (compute_mode, op0, d);
4198                         if (remainder)
4199                           return gen_lowpart (mode, remainder);
4200                       }
4201
4202                     if (sdiv_pow2_cheap (speed, compute_mode)
4203                         && ((optab_handler (sdiv_optab, compute_mode)
4204                              != CODE_FOR_nothing)
4205                             || (optab_handler (sdivmod_optab, compute_mode)
4206                                 != CODE_FOR_nothing)))
4207                       quotient = expand_divmod (0, TRUNC_DIV_EXPR,
4208                                                 compute_mode, op0,
4209                                                 gen_int_mode (abs_d,
4210                                                               compute_mode),
4211                                                 NULL_RTX, 0);
4212                     else
4213                       quotient = expand_sdiv_pow2 (compute_mode, op0, abs_d);
4214
4215                     /* We have computed OP0 / abs(OP1).  If OP1 is negative,
4216                        negate the quotient.  */
4217                     if (d < 0)
4218                       {
4219                         insn = get_last_insn ();
4220                         if (insn != last
4221                             && abs_d < ((unsigned HOST_WIDE_INT) 1
4222                                         << (HOST_BITS_PER_WIDE_INT - 1)))
4223                           set_dst_reg_note (insn, REG_EQUAL,
4224                                             gen_rtx_DIV (compute_mode, op0,
4225                                                          gen_int_mode
4226                                                            (abs_d,
4227                                                             compute_mode)),
4228                                             quotient);
4229
4230                         quotient = expand_unop (compute_mode, neg_optab,
4231                                                 quotient, quotient, 0);
4232                       }
4233                   }
4234                 else if (size <= HOST_BITS_PER_WIDE_INT)
4235                   {
4236                     choose_multiplier (abs_d, size, size - 1,
4237                                        &ml, &post_shift, &lgup);
4238                     if (ml < (unsigned HOST_WIDE_INT) 1 << (size - 1))
4239                       {
4240                         rtx t1, t2, t3;
4241
4242                         if (post_shift >= BITS_PER_WORD
4243                             || size - 1 >= BITS_PER_WORD)
4244                           goto fail1;
4245
4246                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4247                                       + shift_cost (speed, compute_mode, size - 1)
4248                                       + add_cost (speed, compute_mode));
4249                         t1 = expmed_mult_highpart (compute_mode, op0,
4250                                                    GEN_INT (ml), NULL_RTX, 0,
4251                                                    max_cost - extra_cost);
4252                         if (t1 == 0)
4253                           goto fail1;
4254                         t2 = expand_shift
4255                           (RSHIFT_EXPR, compute_mode, t1,
4256                            post_shift, NULL_RTX, 0);
4257                         t3 = expand_shift
4258                           (RSHIFT_EXPR, compute_mode, op0,
4259                            size - 1, NULL_RTX, 0);
4260                         if (d < 0)
4261                           quotient
4262                             = force_operand (gen_rtx_MINUS (compute_mode,
4263                                                             t3, t2),
4264                                              tquotient);
4265                         else
4266                           quotient
4267                             = force_operand (gen_rtx_MINUS (compute_mode,
4268                                                             t2, t3),
4269                                              tquotient);
4270                       }
4271                     else
4272                       {
4273                         rtx t1, t2, t3, t4;
4274
4275                         if (post_shift >= BITS_PER_WORD
4276                             || size - 1 >= BITS_PER_WORD)
4277                           goto fail1;
4278
4279                         ml |= (~(unsigned HOST_WIDE_INT) 0) << (size - 1);
4280                         mlr = gen_int_mode (ml, compute_mode);
4281                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4282                                       + shift_cost (speed, compute_mode, size - 1)
4283                                       + 2 * add_cost (speed, compute_mode));
4284                         t1 = expmed_mult_highpart (compute_mode, op0, mlr,
4285                                                    NULL_RTX, 0,
4286                                                    max_cost - extra_cost);
4287                         if (t1 == 0)
4288                           goto fail1;
4289                         t2 = force_operand (gen_rtx_PLUS (compute_mode,
4290                                                           t1, op0),
4291                                             NULL_RTX);
4292                         t3 = expand_shift
4293                           (RSHIFT_EXPR, compute_mode, t2,
4294                            post_shift, NULL_RTX, 0);
4295                         t4 = expand_shift
4296                           (RSHIFT_EXPR, compute_mode, op0,
4297                            size - 1, NULL_RTX, 0);
4298                         if (d < 0)
4299                           quotient
4300                             = force_operand (gen_rtx_MINUS (compute_mode,
4301                                                             t4, t3),
4302                                              tquotient);
4303                         else
4304                           quotient
4305                             = force_operand (gen_rtx_MINUS (compute_mode,
4306                                                             t3, t4),
4307                                              tquotient);
4308                       }
4309                   }
4310                 else            /* Too wide mode to use tricky code */
4311                   break;
4312
4313                 insn = get_last_insn ();
4314                 if (insn != last)
4315                   set_dst_reg_note (insn, REG_EQUAL,
4316                                     gen_rtx_DIV (compute_mode, op0, op1),
4317                                     quotient);
4318               }
4319             break;
4320           }
4321       fail1:
4322         delete_insns_since (last);
4323         break;
4324
4325       case FLOOR_DIV_EXPR:
4326       case FLOOR_MOD_EXPR:
4327       /* We will come here only for signed operations.  */
4328         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4329           {
4330             unsigned HOST_WIDE_INT mh, ml;
4331             int pre_shift, lgup, post_shift;
4332             HOST_WIDE_INT d = INTVAL (op1);
4333
4334             if (d > 0)
4335               {
4336                 /* We could just as easily deal with negative constants here,
4337                    but it does not seem worth the trouble for GCC 2.6.  */
4338                 if (EXACT_POWER_OF_2_OR_ZERO_P (d))
4339                   {
4340                     pre_shift = floor_log2 (d);
4341                     if (rem_flag)
4342                       {
4343                         remainder = expand_binop (compute_mode, and_optab, op0,
4344                                                   GEN_INT (((HOST_WIDE_INT) 1 << pre_shift) - 1),
4345                                                   remainder, 0, OPTAB_LIB_WIDEN);
4346                         if (remainder)
4347                           return gen_lowpart (mode, remainder);
4348                       }
4349                     quotient = expand_shift
4350                       (RSHIFT_EXPR, compute_mode, op0,
4351                        pre_shift, tquotient, 0);
4352                   }
4353                 else
4354                   {
4355                     rtx t1, t2, t3, t4;
4356
4357                     mh = choose_multiplier (d, size, size - 1,
4358                                             &ml, &post_shift, &lgup);
4359                     gcc_assert (!mh);
4360
4361                     if (post_shift < BITS_PER_WORD
4362                         && size - 1 < BITS_PER_WORD)
4363                       {
4364                         t1 = expand_shift
4365                           (RSHIFT_EXPR, compute_mode, op0,
4366                            size - 1, NULL_RTX, 0);
4367                         t2 = expand_binop (compute_mode, xor_optab, op0, t1,
4368                                            NULL_RTX, 0, OPTAB_WIDEN);
4369                         extra_cost = (shift_cost (speed, compute_mode, post_shift)
4370                                       + shift_cost (speed, compute_mode, size - 1)
4371                                       + 2 * add_cost (speed, compute_mode));
4372                         t3 = expmed_mult_highpart (compute_mode, t2,
4373                                                    GEN_INT (ml), NULL_RTX, 1,
4374                                                    max_cost - extra_cost);
4375                         if (t3 != 0)
4376                           {
4377                             t4 = expand_shift
4378                               (RSHIFT_EXPR, compute_mode, t3,
4379                                post_shift, NULL_RTX, 1);
4380                             quotient = expand_binop (compute_mode, xor_optab,
4381                                                      t4, t1, tquotient, 0,
4382                                                      OPTAB_WIDEN);
4383                           }
4384                       }
4385                   }
4386               }
4387             else
4388               {
4389                 rtx nsign, t1, t2, t3, t4;
4390                 t1 = force_operand (gen_rtx_PLUS (compute_mode,
4391                                                   op0, constm1_rtx), NULL_RTX);
4392                 t2 = expand_binop (compute_mode, ior_optab, op0, t1, NULL_RTX,
4393                                    0, OPTAB_WIDEN);
4394                 nsign = expand_shift
4395                   (RSHIFT_EXPR, compute_mode, t2,
4396                    size - 1, NULL_RTX, 0);
4397                 t3 = force_operand (gen_rtx_MINUS (compute_mode, t1, nsign),
4398                                     NULL_RTX);
4399                 t4 = expand_divmod (0, TRUNC_DIV_EXPR, compute_mode, t3, op1,
4400                                     NULL_RTX, 0);
4401                 if (t4)
4402                   {
4403                     rtx t5;
4404                     t5 = expand_unop (compute_mode, one_cmpl_optab, nsign,
4405                                       NULL_RTX, 0);
4406                     quotient = force_operand (gen_rtx_PLUS (compute_mode,
4407                                                             t4, t5),
4408                                               tquotient);
4409                   }
4410               }
4411           }
4412
4413         if (quotient != 0)
4414           break;
4415         delete_insns_since (last);
4416
4417         /* Try using an instruction that produces both the quotient and
4418            remainder, using truncation.  We can easily compensate the quotient
4419            or remainder to get floor rounding, once we have the remainder.
4420            Notice that we compute also the final remainder value here,
4421            and return the result right away.  */
4422         if (target == 0 || GET_MODE (target) != compute_mode)
4423           target = gen_reg_rtx (compute_mode);
4424
4425         if (rem_flag)
4426           {
4427             remainder
4428               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4429             quotient = gen_reg_rtx (compute_mode);
4430           }
4431         else
4432           {
4433             quotient
4434               = REG_P (target) ? target : gen_reg_rtx (compute_mode);
4435             remainder = gen_reg_rtx (compute_mode);
4436           }
4437
4438         if (expand_twoval_binop (sdivmod_optab, op0, op1,
4439                                  quotient, remainder, 0))
4440           {
4441             /* This could be computed with a branch-less sequence.
4442                Save that for later.  */
4443             rtx tem;
4444             rtx label = gen_label_rtx ();
4445             do_cmp_and_jump (remainder, const0_rtx, EQ, compute_mode, label);
4446             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4447                                 NULL_RTX, 0, OPTAB_WIDEN);
4448             do_cmp_and_jump (tem, const0_rtx, GE, compute_mode, label);
4449             expand_dec (quotient, const1_rtx);
4450             expand_inc (remainder, op1);
4451             emit_label (label);
4452             return gen_lowpart (mode, rem_flag ? remainder : quotient);
4453           }
4454
4455         /* No luck with division elimination or divmod.  Have to do it
4456            by conditionally adjusting op0 *and* the result.  */
4457         {
4458           rtx label1, label2, label3, label4, label5;
4459           rtx adjusted_op0;
4460           rtx tem;
4461
4462           quotient = gen_reg_rtx (compute_mode);
4463           adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4464           label1 = gen_label_rtx ();
4465           label2 = gen_label_rtx ();
4466           label3 = gen_label_rtx ();
4467           label4 = gen_label_rtx ();
4468           label5 = gen_label_rtx ();
4469           do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4470           do_cmp_and_jump (adjusted_op0, const0_rtx, LT, compute_mode, label1);
4471           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4472                               quotient, 0, OPTAB_LIB_WIDEN);
4473           if (tem != quotient)
4474             emit_move_insn (quotient, tem);
4475           emit_jump_insn (gen_jump (label5));
4476           emit_barrier ();
4477           emit_label (label1);
4478           expand_inc (adjusted_op0, const1_rtx);
4479           emit_jump_insn (gen_jump (label4));
4480           emit_barrier ();
4481           emit_label (label2);
4482           do_cmp_and_jump (adjusted_op0, const0_rtx, GT, compute_mode, label3);
4483           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4484                               quotient, 0, OPTAB_LIB_WIDEN);
4485           if (tem != quotient)
4486             emit_move_insn (quotient, tem);
4487           emit_jump_insn (gen_jump (label5));
4488           emit_barrier ();
4489           emit_label (label3);
4490           expand_dec (adjusted_op0, const1_rtx);
4491           emit_label (label4);
4492           tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4493                               quotient, 0, OPTAB_LIB_WIDEN);
4494           if (tem != quotient)
4495             emit_move_insn (quotient, tem);
4496           expand_dec (quotient, const1_rtx);
4497           emit_label (label5);
4498         }
4499         break;
4500
4501       case CEIL_DIV_EXPR:
4502       case CEIL_MOD_EXPR:
4503         if (unsignedp)
4504           {
4505             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1)))
4506               {
4507                 rtx t1, t2, t3;
4508                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4509                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4510                                    floor_log2 (d), tquotient, 1);
4511                 t2 = expand_binop (compute_mode, and_optab, op0,
4512                                    GEN_INT (d - 1),
4513                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4514                 t3 = gen_reg_rtx (compute_mode);
4515                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4516                                       compute_mode, 1, 1);
4517                 if (t3 == 0)
4518                   {
4519                     rtx lab;
4520                     lab = gen_label_rtx ();
4521                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4522                     expand_inc (t1, const1_rtx);
4523                     emit_label (lab);
4524                     quotient = t1;
4525                   }
4526                 else
4527                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4528                                                           t1, t3),
4529                                             tquotient);
4530                 break;
4531               }
4532
4533             /* Try using an instruction that produces both the quotient and
4534                remainder, using truncation.  We can easily compensate the
4535                quotient or remainder to get ceiling rounding, once we have the
4536                remainder.  Notice that we compute also the final remainder
4537                value here, and return the result right away.  */
4538             if (target == 0 || GET_MODE (target) != compute_mode)
4539               target = gen_reg_rtx (compute_mode);
4540
4541             if (rem_flag)
4542               {
4543                 remainder = (REG_P (target)
4544                              ? target : gen_reg_rtx (compute_mode));
4545                 quotient = gen_reg_rtx (compute_mode);
4546               }
4547             else
4548               {
4549                 quotient = (REG_P (target)
4550                             ? target : gen_reg_rtx (compute_mode));
4551                 remainder = gen_reg_rtx (compute_mode);
4552               }
4553
4554             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient,
4555                                      remainder, 1))
4556               {
4557                 /* This could be computed with a branch-less sequence.
4558                    Save that for later.  */
4559                 rtx label = gen_label_rtx ();
4560                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4561                                  compute_mode, label);
4562                 expand_inc (quotient, const1_rtx);
4563                 expand_dec (remainder, op1);
4564                 emit_label (label);
4565                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4566               }
4567
4568             /* No luck with division elimination or divmod.  Have to do it
4569                by conditionally adjusting op0 *and* the result.  */
4570             {
4571               rtx label1, label2;
4572               rtx adjusted_op0, tem;
4573
4574               quotient = gen_reg_rtx (compute_mode);
4575               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4576               label1 = gen_label_rtx ();
4577               label2 = gen_label_rtx ();
4578               do_cmp_and_jump (adjusted_op0, const0_rtx, NE,
4579                                compute_mode, label1);
4580               emit_move_insn  (quotient, const0_rtx);
4581               emit_jump_insn (gen_jump (label2));
4582               emit_barrier ();
4583               emit_label (label1);
4584               expand_dec (adjusted_op0, const1_rtx);
4585               tem = expand_binop (compute_mode, udiv_optab, adjusted_op0, op1,
4586                                   quotient, 1, OPTAB_LIB_WIDEN);
4587               if (tem != quotient)
4588                 emit_move_insn (quotient, tem);
4589               expand_inc (quotient, const1_rtx);
4590               emit_label (label2);
4591             }
4592           }
4593         else /* signed */
4594           {
4595             if (op1_is_constant && EXACT_POWER_OF_2_OR_ZERO_P (INTVAL (op1))
4596                 && INTVAL (op1) >= 0)
4597               {
4598                 /* This is extremely similar to the code for the unsigned case
4599                    above.  For 2.7 we should merge these variants, but for
4600                    2.6.1 I don't want to touch the code for unsigned since that
4601                    get used in C.  The signed case will only be used by other
4602                    languages (Ada).  */
4603
4604                 rtx t1, t2, t3;
4605                 unsigned HOST_WIDE_INT d = INTVAL (op1);
4606                 t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4607                                    floor_log2 (d), tquotient, 0);
4608                 t2 = expand_binop (compute_mode, and_optab, op0,
4609                                    GEN_INT (d - 1),
4610                                    NULL_RTX, 1, OPTAB_LIB_WIDEN);
4611                 t3 = gen_reg_rtx (compute_mode);
4612                 t3 = emit_store_flag (t3, NE, t2, const0_rtx,
4613                                       compute_mode, 1, 1);
4614                 if (t3 == 0)
4615                   {
4616                     rtx lab;
4617                     lab = gen_label_rtx ();
4618                     do_cmp_and_jump (t2, const0_rtx, EQ, compute_mode, lab);
4619                     expand_inc (t1, const1_rtx);
4620                     emit_label (lab);
4621                     quotient = t1;
4622                   }
4623                 else
4624                   quotient = force_operand (gen_rtx_PLUS (compute_mode,
4625                                                           t1, t3),
4626                                             tquotient);
4627                 break;
4628               }
4629
4630             /* Try using an instruction that produces both the quotient and
4631                remainder, using truncation.  We can easily compensate the
4632                quotient or remainder to get ceiling rounding, once we have the
4633                remainder.  Notice that we compute also the final remainder
4634                value here, and return the result right away.  */
4635             if (target == 0 || GET_MODE (target) != compute_mode)
4636               target = gen_reg_rtx (compute_mode);
4637             if (rem_flag)
4638               {
4639                 remainder= (REG_P (target)
4640                             ? target : gen_reg_rtx (compute_mode));
4641                 quotient = gen_reg_rtx (compute_mode);
4642               }
4643             else
4644               {
4645                 quotient = (REG_P (target)
4646                             ? target : gen_reg_rtx (compute_mode));
4647                 remainder = gen_reg_rtx (compute_mode);
4648               }
4649
4650             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient,
4651                                      remainder, 0))
4652               {
4653                 /* This could be computed with a branch-less sequence.
4654                    Save that for later.  */
4655                 rtx tem;
4656                 rtx label = gen_label_rtx ();
4657                 do_cmp_and_jump (remainder, const0_rtx, EQ,
4658                                  compute_mode, label);
4659                 tem = expand_binop (compute_mode, xor_optab, op0, op1,
4660                                     NULL_RTX, 0, OPTAB_WIDEN);
4661                 do_cmp_and_jump (tem, const0_rtx, LT, compute_mode, label);
4662                 expand_inc (quotient, const1_rtx);
4663                 expand_dec (remainder, op1);
4664                 emit_label (label);
4665                 return gen_lowpart (mode, rem_flag ? remainder : quotient);
4666               }
4667
4668             /* No luck with division elimination or divmod.  Have to do it
4669                by conditionally adjusting op0 *and* the result.  */
4670             {
4671               rtx label1, label2, label3, label4, label5;
4672               rtx adjusted_op0;
4673               rtx tem;
4674
4675               quotient = gen_reg_rtx (compute_mode);
4676               adjusted_op0 = copy_to_mode_reg (compute_mode, op0);
4677               label1 = gen_label_rtx ();
4678               label2 = gen_label_rtx ();
4679               label3 = gen_label_rtx ();
4680               label4 = gen_label_rtx ();
4681               label5 = gen_label_rtx ();
4682               do_cmp_and_jump (op1, const0_rtx, LT, compute_mode, label2);
4683               do_cmp_and_jump (adjusted_op0, const0_rtx, GT,
4684                                compute_mode, label1);
4685               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4686                                   quotient, 0, OPTAB_LIB_WIDEN);
4687               if (tem != quotient)
4688                 emit_move_insn (quotient, tem);
4689               emit_jump_insn (gen_jump (label5));
4690               emit_barrier ();
4691               emit_label (label1);
4692               expand_dec (adjusted_op0, const1_rtx);
4693               emit_jump_insn (gen_jump (label4));
4694               emit_barrier ();
4695               emit_label (label2);
4696               do_cmp_and_jump (adjusted_op0, const0_rtx, LT,
4697                                compute_mode, label3);
4698               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4699                                   quotient, 0, OPTAB_LIB_WIDEN);
4700               if (tem != quotient)
4701                 emit_move_insn (quotient, tem);
4702               emit_jump_insn (gen_jump (label5));
4703               emit_barrier ();
4704               emit_label (label3);
4705               expand_inc (adjusted_op0, const1_rtx);
4706               emit_label (label4);
4707               tem = expand_binop (compute_mode, sdiv_optab, adjusted_op0, op1,
4708                                   quotient, 0, OPTAB_LIB_WIDEN);
4709               if (tem != quotient)
4710                 emit_move_insn (quotient, tem);
4711               expand_inc (quotient, const1_rtx);
4712               emit_label (label5);
4713             }
4714           }
4715         break;
4716
4717       case EXACT_DIV_EXPR:
4718         if (op1_is_constant && HOST_BITS_PER_WIDE_INT >= size)
4719           {
4720             HOST_WIDE_INT d = INTVAL (op1);
4721             unsigned HOST_WIDE_INT ml;
4722             int pre_shift;
4723             rtx t1;
4724
4725             pre_shift = floor_log2 (d & -d);
4726             ml = invert_mod2n (d >> pre_shift, size);
4727             t1 = expand_shift (RSHIFT_EXPR, compute_mode, op0,
4728                                pre_shift, NULL_RTX, unsignedp);
4729             quotient = expand_mult (compute_mode, t1,
4730                                     gen_int_mode (ml, compute_mode),
4731                                     NULL_RTX, 1);
4732
4733             insn = get_last_insn ();
4734             set_dst_reg_note (insn, REG_EQUAL,
4735                               gen_rtx_fmt_ee (unsignedp ? UDIV : DIV,
4736                                               compute_mode, op0, op1),
4737                               quotient);
4738           }
4739         break;
4740
4741       case ROUND_DIV_EXPR:
4742       case ROUND_MOD_EXPR:
4743         if (unsignedp)
4744           {
4745             rtx tem;
4746             rtx label;
4747             label = gen_label_rtx ();
4748             quotient = gen_reg_rtx (compute_mode);
4749             remainder = gen_reg_rtx (compute_mode);
4750             if (expand_twoval_binop (udivmod_optab, op0, op1, quotient, remainder, 1) == 0)
4751               {
4752                 rtx tem;
4753                 quotient = expand_binop (compute_mode, udiv_optab, op0, op1,
4754                                          quotient, 1, OPTAB_LIB_WIDEN);
4755                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 1);
4756                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4757                                           remainder, 1, OPTAB_LIB_WIDEN);
4758               }
4759             tem = plus_constant (compute_mode, op1, -1);
4760             tem = expand_shift (RSHIFT_EXPR, compute_mode, tem, 1, NULL_RTX, 1);
4761             do_cmp_and_jump (remainder, tem, LEU, compute_mode, label);
4762             expand_inc (quotient, const1_rtx);
4763             expand_dec (remainder, op1);
4764             emit_label (label);
4765           }
4766         else
4767           {
4768             rtx abs_rem, abs_op1, tem, mask;
4769             rtx label;
4770             label = gen_label_rtx ();
4771             quotient = gen_reg_rtx (compute_mode);
4772             remainder = gen_reg_rtx (compute_mode);
4773             if (expand_twoval_binop (sdivmod_optab, op0, op1, quotient, remainder, 0) == 0)
4774               {
4775                 rtx tem;
4776                 quotient = expand_binop (compute_mode, sdiv_optab, op0, op1,
4777                                          quotient, 0, OPTAB_LIB_WIDEN);
4778                 tem = expand_mult (compute_mode, quotient, op1, NULL_RTX, 0);
4779                 remainder = expand_binop (compute_mode, sub_optab, op0, tem,
4780                                           remainder, 0, OPTAB_LIB_WIDEN);
4781               }
4782             abs_rem = expand_abs (compute_mode, remainder, NULL_RTX, 1, 0);
4783             abs_op1 = expand_abs (compute_mode, op1, NULL_RTX, 1, 0);
4784             tem = expand_shift (LSHIFT_EXPR, compute_mode, abs_rem,
4785                                 1, NULL_RTX, 1);
4786             do_cmp_and_jump (tem, abs_op1, LTU, compute_mode, label);
4787             tem = expand_binop (compute_mode, xor_optab, op0, op1,
4788                                 NULL_RTX, 0, OPTAB_WIDEN);
4789             mask = expand_shift (RSHIFT_EXPR, compute_mode, tem,
4790                                  size - 1, NULL_RTX, 0);
4791             tem = expand_binop (compute_mode, xor_optab, mask, const1_rtx,
4792                                 NULL_RTX, 0, OPTAB_WIDEN);
4793             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4794                                 NULL_RTX, 0, OPTAB_WIDEN);
4795             expand_inc (quotient, tem);
4796             tem = expand_binop (compute_mode, xor_optab, mask, op1,
4797                                 NULL_RTX, 0, OPTAB_WIDEN);
4798             tem = expand_binop (compute_mode, sub_optab, tem, mask,
4799                                 NULL_RTX, 0, OPTAB_WIDEN);
4800             expand_dec (remainder, tem);
4801             emit_label (label);
4802           }
4803         return gen_lowpart (mode, rem_flag ? remainder : quotient);
4804
4805       default:
4806         gcc_unreachable ();
4807       }
4808
4809   if (quotient == 0)
4810     {
4811       if (target && GET_MODE (target) != compute_mode)
4812         target = 0;
4813
4814       if (rem_flag)
4815         {
4816           /* Try to produce the remainder without producing the quotient.
4817              If we seem to have a divmod pattern that does not require widening,
4818              don't try widening here.  We should really have a WIDEN argument
4819              to expand_twoval_binop, since what we'd really like to do here is
4820              1) try a mod insn in compute_mode
4821              2) try a divmod insn in compute_mode
4822              3) try a div insn in compute_mode and multiply-subtract to get
4823                 remainder
4824              4) try the same things with widening allowed.  */
4825           remainder
4826             = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4827                                  op0, op1, target,
4828                                  unsignedp,
4829                                  ((optab_handler (optab2, compute_mode)
4830                                    != CODE_FOR_nothing)
4831                                   ? OPTAB_DIRECT : OPTAB_WIDEN));
4832           if (remainder == 0)
4833             {
4834               /* No luck there.  Can we do remainder and divide at once
4835                  without a library call?  */
4836               remainder = gen_reg_rtx (compute_mode);
4837               if (! expand_twoval_binop ((unsignedp
4838                                           ? udivmod_optab
4839                                           : sdivmod_optab),
4840                                          op0, op1,
4841                                          NULL_RTX, remainder, unsignedp))
4842                 remainder = 0;
4843             }
4844
4845           if (remainder)
4846             return gen_lowpart (mode, remainder);
4847         }
4848
4849       /* Produce the quotient.  Try a quotient insn, but not a library call.
4850          If we have a divmod in this mode, use it in preference to widening
4851          the div (for this test we assume it will not fail). Note that optab2
4852          is set to the one of the two optabs that the call below will use.  */
4853       quotient
4854         = sign_expand_binop (compute_mode, udiv_optab, sdiv_optab,
4855                              op0, op1, rem_flag ? NULL_RTX : target,
4856                              unsignedp,
4857                              ((optab_handler (optab2, compute_mode)
4858                                != CODE_FOR_nothing)
4859                               ? OPTAB_DIRECT : OPTAB_WIDEN));
4860
4861       if (quotient == 0)
4862         {
4863           /* No luck there.  Try a quotient-and-remainder insn,
4864              keeping the quotient alone.  */
4865           quotient = gen_reg_rtx (compute_mode);
4866           if (! expand_twoval_binop (unsignedp ? udivmod_optab : sdivmod_optab,
4867                                      op0, op1,
4868                                      quotient, NULL_RTX, unsignedp))
4869             {
4870               quotient = 0;
4871               if (! rem_flag)
4872                 /* Still no luck.  If we are not computing the remainder,
4873                    use a library call for the quotient.  */
4874                 quotient = sign_expand_binop (compute_mode,
4875                                               udiv_optab, sdiv_optab,
4876                                               op0, op1, target,
4877                                               unsignedp, OPTAB_LIB_WIDEN);
4878             }
4879         }
4880     }
4881
4882   if (rem_flag)
4883     {
4884       if (target && GET_MODE (target) != compute_mode)
4885         target = 0;
4886
4887       if (quotient == 0)
4888         {
4889           /* No divide instruction either.  Use library for remainder.  */
4890           remainder = sign_expand_binop (compute_mode, umod_optab, smod_optab,
4891                                          op0, op1, target,
4892                                          unsignedp, OPTAB_LIB_WIDEN);
4893           /* No remainder function.  Try a quotient-and-remainder
4894              function, keeping the remainder.  */
4895           if (!remainder)
4896             {
4897               remainder = gen_reg_rtx (compute_mode);
4898               if (!expand_twoval_binop_libfunc
4899                   (unsignedp ? udivmod_optab : sdivmod_optab,
4900                    op0, op1,
4901                    NULL_RTX, remainder,
4902                    unsignedp ? UMOD : MOD))
4903                 remainder = NULL_RTX;
4904             }
4905         }
4906       else
4907         {
4908           /* We divided.  Now finish doing X - Y * (X / Y).  */
4909           remainder = expand_mult (compute_mode, quotient, op1,
4910                                    NULL_RTX, unsignedp);
4911           remainder = expand_binop (compute_mode, sub_optab, op0,
4912                                     remainder, target, unsignedp,
4913                                     OPTAB_LIB_WIDEN);
4914         }
4915     }
4916
4917   return gen_lowpart (mode, rem_flag ? remainder : quotient);
4918 }
4919 \f
4920 /* Return a tree node with data type TYPE, describing the value of X.
4921    Usually this is an VAR_DECL, if there is no obvious better choice.
4922    X may be an expression, however we only support those expressions
4923    generated by loop.c.  */
4924
4925 tree
4926 make_tree (tree type, rtx x)
4927 {
4928   tree t;
4929
4930   switch (GET_CODE (x))
4931     {
4932     case CONST_INT:
4933       {
4934         HOST_WIDE_INT hi = 0;
4935
4936         if (INTVAL (x) < 0
4937             && !(TYPE_UNSIGNED (type)
4938                  && (GET_MODE_BITSIZE (TYPE_MODE (type))
4939                      < HOST_BITS_PER_WIDE_INT)))
4940           hi = -1;
4941
4942         t = build_int_cst_wide (type, INTVAL (x), hi);
4943
4944         return t;
4945       }
4946
4947     case CONST_DOUBLE:
4948       if (GET_MODE (x) == VOIDmode)
4949         t = build_int_cst_wide (type,
4950                                 CONST_DOUBLE_LOW (x), CONST_DOUBLE_HIGH (x));
4951       else
4952         {
4953           REAL_VALUE_TYPE d;
4954
4955           REAL_VALUE_FROM_CONST_DOUBLE (d, x);
4956           t = build_real (type, d);
4957         }
4958
4959       return t;
4960
4961     case CONST_VECTOR:
4962       {
4963         int units = CONST_VECTOR_NUNITS (x);
4964         tree itype = TREE_TYPE (type);
4965         tree *elts;
4966         int i;
4967
4968         /* Build a tree with vector elements.  */
4969         elts = XALLOCAVEC (tree, units);
4970         for (i = units - 1; i >= 0; --i)
4971           {
4972             rtx elt = CONST_VECTOR_ELT (x, i);
4973             elts[i] = make_tree (itype, elt);
4974           }
4975
4976         return build_vector (type, elts);
4977       }
4978
4979     case PLUS:
4980       return fold_build2 (PLUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4981                           make_tree (type, XEXP (x, 1)));
4982
4983     case MINUS:
4984       return fold_build2 (MINUS_EXPR, type, make_tree (type, XEXP (x, 0)),
4985                           make_tree (type, XEXP (x, 1)));
4986
4987     case NEG:
4988       return fold_build1 (NEGATE_EXPR, type, make_tree (type, XEXP (x, 0)));
4989
4990     case MULT:
4991       return fold_build2 (MULT_EXPR, type, make_tree (type, XEXP (x, 0)),
4992                           make_tree (type, XEXP (x, 1)));
4993
4994     case ASHIFT:
4995       return fold_build2 (LSHIFT_EXPR, type, make_tree (type, XEXP (x, 0)),
4996                           make_tree (type, XEXP (x, 1)));
4997
4998     case LSHIFTRT:
4999       t = unsigned_type_for (type);
5000       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5001                                          make_tree (t, XEXP (x, 0)),
5002                                          make_tree (type, XEXP (x, 1))));
5003
5004     case ASHIFTRT:
5005       t = signed_type_for (type);
5006       return fold_convert (type, build2 (RSHIFT_EXPR, t,
5007                                          make_tree (t, XEXP (x, 0)),
5008                                          make_tree (type, XEXP (x, 1))));
5009
5010     case DIV:
5011       if (TREE_CODE (type) != REAL_TYPE)
5012         t = signed_type_for (type);
5013       else
5014         t = type;
5015
5016       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5017                                          make_tree (t, XEXP (x, 0)),
5018                                          make_tree (t, XEXP (x, 1))));
5019     case UDIV:
5020       t = unsigned_type_for (type);
5021       return fold_convert (type, build2 (TRUNC_DIV_EXPR, t,
5022                                          make_tree (t, XEXP (x, 0)),
5023                                          make_tree (t, XEXP (x, 1))));
5024
5025     case SIGN_EXTEND:
5026     case ZERO_EXTEND:
5027       t = lang_hooks.types.type_for_mode (GET_MODE (XEXP (x, 0)),
5028                                           GET_CODE (x) == ZERO_EXTEND);
5029       return fold_convert (type, make_tree (t, XEXP (x, 0)));
5030
5031     case CONST:
5032       return make_tree (type, XEXP (x, 0));
5033
5034     case SYMBOL_REF:
5035       t = SYMBOL_REF_DECL (x);
5036       if (t)
5037         return fold_convert (type, build_fold_addr_expr (t));
5038       /* else fall through.  */
5039
5040     default:
5041       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
5042
5043       /* If TYPE is a POINTER_TYPE, we might need to convert X from
5044          address mode to pointer mode.  */
5045       if (POINTER_TYPE_P (type))
5046         x = convert_memory_address_addr_space
5047               (TYPE_MODE (type), x, TYPE_ADDR_SPACE (TREE_TYPE (type)));
5048
5049       /* Note that we do *not* use SET_DECL_RTL here, because we do not
5050          want set_decl_rtl to go adjusting REG_ATTRS for this temporary.  */
5051       t->decl_with_rtl.rtl = x;
5052
5053       return t;
5054     }
5055 }
5056 \f
5057 /* Compute the logical-and of OP0 and OP1, storing it in TARGET
5058    and returning TARGET.
5059
5060    If TARGET is 0, a pseudo-register or constant is returned.  */
5061
5062 rtx
5063 expand_and (enum machine_mode mode, rtx op0, rtx op1, rtx target)
5064 {
5065   rtx tem = 0;
5066
5067   if (GET_MODE (op0) == VOIDmode && GET_MODE (op1) == VOIDmode)
5068     tem = simplify_binary_operation (AND, mode, op0, op1);
5069   if (tem == 0)
5070     tem = expand_binop (mode, and_optab, op0, op1, target, 0, OPTAB_LIB_WIDEN);
5071
5072   if (target == 0)
5073     target = tem;
5074   else if (tem != target)
5075     emit_move_insn (target, tem);
5076   return target;
5077 }
5078
5079 /* Helper function for emit_store_flag.  */
5080 static rtx
5081 emit_cstore (rtx target, enum insn_code icode, enum rtx_code code,
5082              enum machine_mode mode, enum machine_mode compare_mode,
5083              int unsignedp, rtx x, rtx y, int normalizep,
5084              enum machine_mode target_mode)
5085 {
5086   struct expand_operand ops[4];
5087   rtx op0, last, comparison, subtarget;
5088   enum machine_mode result_mode = insn_data[(int) icode].operand[0].mode;
5089
5090   last = get_last_insn ();
5091   x = prepare_operand (icode, x, 2, mode, compare_mode, unsignedp);
5092   y = prepare_operand (icode, y, 3, mode, compare_mode, unsignedp);
5093   if (!x || !y)
5094     {
5095       delete_insns_since (last);
5096       return NULL_RTX;
5097     }
5098
5099   if (target_mode == VOIDmode)
5100     target_mode = result_mode;
5101   if (!target)
5102     target = gen_reg_rtx (target_mode);
5103
5104   comparison = gen_rtx_fmt_ee (code, result_mode, x, y);
5105
5106   create_output_operand (&ops[0], optimize ? NULL_RTX : target, result_mode);
5107   create_fixed_operand (&ops[1], comparison);
5108   create_fixed_operand (&ops[2], x);
5109   create_fixed_operand (&ops[3], y);
5110   if (!maybe_expand_insn (icode, 4, ops))
5111     {
5112       delete_insns_since (last);
5113       return NULL_RTX;
5114     }
5115   subtarget = ops[0].value;
5116
5117   /* If we are converting to a wider mode, first convert to
5118      TARGET_MODE, then normalize.  This produces better combining
5119      opportunities on machines that have a SIGN_EXTRACT when we are
5120      testing a single bit.  This mostly benefits the 68k.
5121
5122      If STORE_FLAG_VALUE does not have the sign bit set when
5123      interpreted in MODE, we can do this conversion as unsigned, which
5124      is usually more efficient.  */
5125   if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (result_mode))
5126     {
5127       convert_move (target, subtarget,
5128                     val_signbit_known_clear_p (result_mode,
5129                                                STORE_FLAG_VALUE));
5130       op0 = target;
5131       result_mode = target_mode;
5132     }
5133   else
5134     op0 = subtarget;
5135
5136   /* If we want to keep subexpressions around, don't reuse our last
5137      target.  */
5138   if (optimize)
5139     subtarget = 0;
5140
5141   /* Now normalize to the proper value in MODE.  Sometimes we don't
5142      have to do anything.  */
5143   if (normalizep == 0 || normalizep == STORE_FLAG_VALUE)
5144     ;
5145   /* STORE_FLAG_VALUE might be the most negative number, so write
5146      the comparison this way to avoid a compiler-time warning.  */
5147   else if (- normalizep == STORE_FLAG_VALUE)
5148     op0 = expand_unop (result_mode, neg_optab, op0, subtarget, 0);
5149
5150   /* We don't want to use STORE_FLAG_VALUE < 0 below since this makes
5151      it hard to use a value of just the sign bit due to ANSI integer
5152      constant typing rules.  */
5153   else if (val_signbit_known_set_p (result_mode, STORE_FLAG_VALUE))
5154     op0 = expand_shift (RSHIFT_EXPR, result_mode, op0,
5155                         GET_MODE_BITSIZE (result_mode) - 1, subtarget,
5156                         normalizep == 1);
5157   else
5158     {
5159       gcc_assert (STORE_FLAG_VALUE & 1);
5160
5161       op0 = expand_and (result_mode, op0, const1_rtx, subtarget);
5162       if (normalizep == -1)
5163         op0 = expand_unop (result_mode, neg_optab, op0, op0, 0);
5164     }
5165
5166   /* If we were converting to a smaller mode, do the conversion now.  */
5167   if (target_mode != result_mode)
5168     {
5169       convert_move (target, op0, 0);
5170       return target;
5171     }
5172   else
5173     return op0;
5174 }
5175
5176
5177 /* A subroutine of emit_store_flag only including "tricks" that do not
5178    need a recursive call.  These are kept separate to avoid infinite
5179    loops.  */
5180
5181 static rtx
5182 emit_store_flag_1 (rtx target, enum rtx_code code, rtx op0, rtx op1,
5183                    enum machine_mode mode, int unsignedp, int normalizep,
5184                    enum machine_mode target_mode)
5185 {
5186   rtx subtarget;
5187   enum insn_code icode;
5188   enum machine_mode compare_mode;
5189   enum mode_class mclass;
5190   enum rtx_code scode;
5191   rtx tem;
5192
5193   if (unsignedp)
5194     code = unsigned_condition (code);
5195   scode = swap_condition (code);
5196
5197   /* If one operand is constant, make it the second one.  Only do this
5198      if the other operand is not constant as well.  */
5199
5200   if (swap_commutative_operands_p (op0, op1))
5201     {
5202       tem = op0;
5203       op0 = op1;
5204       op1 = tem;
5205       code = swap_condition (code);
5206     }
5207
5208   if (mode == VOIDmode)
5209     mode = GET_MODE (op0);
5210
5211   /* For some comparisons with 1 and -1, we can convert this to
5212      comparisons with zero.  This will often produce more opportunities for
5213      store-flag insns.  */
5214
5215   switch (code)
5216     {
5217     case LT:
5218       if (op1 == const1_rtx)
5219         op1 = const0_rtx, code = LE;
5220       break;
5221     case LE:
5222       if (op1 == constm1_rtx)
5223         op1 = const0_rtx, code = LT;
5224       break;
5225     case GE:
5226       if (op1 == const1_rtx)
5227         op1 = const0_rtx, code = GT;
5228       break;
5229     case GT:
5230       if (op1 == constm1_rtx)
5231         op1 = const0_rtx, code = GE;
5232       break;
5233     case GEU:
5234       if (op1 == const1_rtx)
5235         op1 = const0_rtx, code = NE;
5236       break;
5237     case LTU:
5238       if (op1 == const1_rtx)
5239         op1 = const0_rtx, code = EQ;
5240       break;
5241     default:
5242       break;
5243     }
5244
5245   /* If we are comparing a double-word integer with zero or -1, we can
5246      convert the comparison into one involving a single word.  */
5247   if (GET_MODE_BITSIZE (mode) == BITS_PER_WORD * 2
5248       && GET_MODE_CLASS (mode) == MODE_INT
5249       && (!MEM_P (op0) || ! MEM_VOLATILE_P (op0)))
5250     {
5251       if ((code == EQ || code == NE)
5252           && (op1 == const0_rtx || op1 == constm1_rtx))
5253         {
5254           rtx op00, op01;
5255
5256           /* Do a logical OR or AND of the two words and compare the
5257              result.  */
5258           op00 = simplify_gen_subreg (word_mode, op0, mode, 0);
5259           op01 = simplify_gen_subreg (word_mode, op0, mode, UNITS_PER_WORD);
5260           tem = expand_binop (word_mode,
5261                               op1 == const0_rtx ? ior_optab : and_optab,
5262                               op00, op01, NULL_RTX, unsignedp,
5263                               OPTAB_DIRECT);
5264
5265           if (tem != 0)
5266             tem = emit_store_flag (NULL_RTX, code, tem, op1, word_mode,
5267                                    unsignedp, normalizep);
5268         }
5269       else if ((code == LT || code == GE) && op1 == const0_rtx)
5270         {
5271           rtx op0h;
5272
5273           /* If testing the sign bit, can just test on high word.  */
5274           op0h = simplify_gen_subreg (word_mode, op0, mode,
5275                                       subreg_highpart_offset (word_mode,
5276                                                               mode));
5277           tem = emit_store_flag (NULL_RTX, code, op0h, op1, word_mode,
5278                                  unsignedp, normalizep);
5279         }
5280       else
5281         tem = NULL_RTX;
5282
5283       if (tem)
5284         {
5285           if (target_mode == VOIDmode || GET_MODE (tem) == target_mode)
5286             return tem;
5287           if (!target)
5288             target = gen_reg_rtx (target_mode);
5289
5290           convert_move (target, tem,
5291                         !val_signbit_known_set_p (word_mode,
5292                                                   (normalizep ? normalizep
5293                                                    : STORE_FLAG_VALUE)));
5294           return target;
5295         }
5296     }
5297
5298   /* If this is A < 0 or A >= 0, we can do this by taking the ones
5299      complement of A (for GE) and shifting the sign bit to the low bit.  */
5300   if (op1 == const0_rtx && (code == LT || code == GE)
5301       && GET_MODE_CLASS (mode) == MODE_INT
5302       && (normalizep || STORE_FLAG_VALUE == 1
5303           || val_signbit_p (mode, STORE_FLAG_VALUE)))
5304     {
5305       subtarget = target;
5306
5307       if (!target)
5308         target_mode = mode;
5309
5310       /* If the result is to be wider than OP0, it is best to convert it
5311          first.  If it is to be narrower, it is *incorrect* to convert it
5312          first.  */
5313       else if (GET_MODE_SIZE (target_mode) > GET_MODE_SIZE (mode))
5314         {
5315           op0 = convert_modes (target_mode, mode, op0, 0);
5316           mode = target_mode;
5317         }
5318
5319       if (target_mode != mode)
5320         subtarget = 0;
5321
5322       if (code == GE)
5323         op0 = expand_unop (mode, one_cmpl_optab, op0,
5324                            ((STORE_FLAG_VALUE == 1 || normalizep)
5325                             ? 0 : subtarget), 0);
5326
5327       if (STORE_FLAG_VALUE == 1 || normalizep)
5328         /* If we are supposed to produce a 0/1 value, we want to do
5329            a logical shift from the sign bit to the low-order bit; for
5330            a -1/0 value, we do an arithmetic shift.  */
5331         op0 = expand_shift (RSHIFT_EXPR, mode, op0,
5332                             GET_MODE_BITSIZE (mode) - 1,
5333                             subtarget, normalizep != -1);
5334
5335       if (mode != target_mode)
5336         op0 = convert_modes (target_mode, mode, op0, 0);
5337
5338       return op0;
5339     }
5340
5341   mclass = GET_MODE_CLASS (mode);
5342   for (compare_mode = mode; compare_mode != VOIDmode;
5343        compare_mode = GET_MODE_WIDER_MODE (compare_mode))
5344     {
5345      enum machine_mode optab_mode = mclass == MODE_CC ? CCmode : compare_mode;
5346      icode = optab_handler (cstore_optab, optab_mode);
5347      if (icode != CODE_FOR_nothing)
5348         {
5349           do_pending_stack_adjust ();
5350           tem = emit_cstore (target, icode, code, mode, compare_mode,
5351                              unsignedp, op0, op1, normalizep, target_mode);
5352           if (tem)
5353             return tem;
5354
5355           if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5356             {
5357               tem = emit_cstore (target, icode, scode, mode, compare_mode,
5358                                  unsignedp, op1, op0, normalizep, target_mode);
5359               if (tem)
5360                 return tem;
5361             }
5362           break;
5363         }
5364     }
5365
5366   return 0;
5367 }
5368
5369 /* Emit a store-flags instruction for comparison CODE on OP0 and OP1
5370    and storing in TARGET.  Normally return TARGET.
5371    Return 0 if that cannot be done.
5372
5373    MODE is the mode to use for OP0 and OP1 should they be CONST_INTs.  If
5374    it is VOIDmode, they cannot both be CONST_INT.
5375
5376    UNSIGNEDP is for the case where we have to widen the operands
5377    to perform the operation.  It says to use zero-extension.
5378
5379    NORMALIZEP is 1 if we should convert the result to be either zero
5380    or one.  Normalize is -1 if we should convert the result to be
5381    either zero or -1.  If NORMALIZEP is zero, the result will be left
5382    "raw" out of the scc insn.  */
5383
5384 rtx
5385 emit_store_flag (rtx target, enum rtx_code code, rtx op0, rtx op1,
5386                  enum machine_mode mode, int unsignedp, int normalizep)
5387 {
5388   enum machine_mode target_mode = target ? GET_MODE (target) : VOIDmode;
5389   enum rtx_code rcode;
5390   rtx subtarget;
5391   rtx tem, last, trueval;
5392
5393   tem = emit_store_flag_1 (target, code, op0, op1, mode, unsignedp, normalizep,
5394                            target_mode);
5395   if (tem)
5396     return tem;
5397
5398   /* If we reached here, we can't do this with a scc insn, however there
5399      are some comparisons that can be done in other ways.  Don't do any
5400      of these cases if branches are very cheap.  */
5401   if (BRANCH_COST (optimize_insn_for_speed_p (), false) == 0)
5402     return 0;
5403
5404   /* See what we need to return.  We can only return a 1, -1, or the
5405      sign bit.  */
5406
5407   if (normalizep == 0)
5408     {
5409       if (STORE_FLAG_VALUE == 1 || STORE_FLAG_VALUE == -1)
5410         normalizep = STORE_FLAG_VALUE;
5411
5412       else if (val_signbit_p (mode, STORE_FLAG_VALUE))
5413         ;
5414       else
5415         return 0;
5416     }
5417
5418   last = get_last_insn ();
5419
5420   /* If optimizing, use different pseudo registers for each insn, instead
5421      of reusing the same pseudo.  This leads to better CSE, but slows
5422      down the compiler, since there are more pseudos */
5423   subtarget = (!optimize
5424                && (target_mode == mode)) ? target : NULL_RTX;
5425   trueval = GEN_INT (normalizep ? normalizep : STORE_FLAG_VALUE);
5426
5427   /* For floating-point comparisons, try the reverse comparison or try
5428      changing the "orderedness" of the comparison.  */
5429   if (GET_MODE_CLASS (mode) == MODE_FLOAT)
5430     {
5431       enum rtx_code first_code;
5432       bool and_them;
5433
5434       rcode = reverse_condition_maybe_unordered (code);
5435       if (can_compare_p (rcode, mode, ccp_store_flag)
5436           && (code == ORDERED || code == UNORDERED
5437               || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5438               || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5439         {
5440           int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5441                           || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5442
5443           /* For the reverse comparison, use either an addition or a XOR.  */
5444           if (want_add
5445               && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5446                            optimize_insn_for_speed_p ()) == 0)
5447             {
5448               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5449                                        STORE_FLAG_VALUE, target_mode);
5450               if (tem)
5451                 return expand_binop (target_mode, add_optab, tem,
5452                                      GEN_INT (normalizep),
5453                                      target, 0, OPTAB_WIDEN);
5454             }
5455           else if (!want_add
5456                    && rtx_cost (trueval, XOR, 1,
5457                                 optimize_insn_for_speed_p ()) == 0)
5458             {
5459               tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5460                                        normalizep, target_mode);
5461               if (tem)
5462                 return expand_binop (target_mode, xor_optab, tem, trueval,
5463                                      target, INTVAL (trueval) >= 0, OPTAB_WIDEN);
5464             }
5465         }
5466
5467       delete_insns_since (last);
5468
5469       /* Cannot split ORDERED and UNORDERED, only try the above trick.   */
5470       if (code == ORDERED || code == UNORDERED)
5471         return 0;
5472
5473       and_them = split_comparison (code, mode, &first_code, &code);
5474
5475       /* If there are no NaNs, the first comparison should always fall through.
5476          Effectively change the comparison to the other one.  */
5477       if (!HONOR_NANS (mode))
5478         {
5479           gcc_assert (first_code == (and_them ? ORDERED : UNORDERED));
5480           return emit_store_flag_1 (target, code, op0, op1, mode, 0, normalizep,
5481                                     target_mode);
5482         }
5483
5484 #ifdef HAVE_conditional_move
5485       /* Try using a setcc instruction for ORDERED/UNORDERED, followed by a
5486          conditional move.  */
5487       tem = emit_store_flag_1 (subtarget, first_code, op0, op1, mode, 0,
5488                                normalizep, target_mode);
5489       if (tem == 0)
5490         return 0;
5491
5492       if (and_them)
5493         tem = emit_conditional_move (target, code, op0, op1, mode,
5494                                      tem, const0_rtx, GET_MODE (tem), 0);
5495       else
5496         tem = emit_conditional_move (target, code, op0, op1, mode,
5497                                      trueval, tem, GET_MODE (tem), 0);
5498
5499       if (tem == 0)
5500         delete_insns_since (last);
5501       return tem;
5502 #else
5503       return 0;
5504 #endif
5505     }
5506
5507   /* The remaining tricks only apply to integer comparisons.  */
5508
5509   if (GET_MODE_CLASS (mode) != MODE_INT)
5510     return 0;
5511
5512   /* If this is an equality comparison of integers, we can try to exclusive-or
5513      (or subtract) the two operands and use a recursive call to try the
5514      comparison with zero.  Don't do any of these cases if branches are
5515      very cheap.  */
5516
5517   if ((code == EQ || code == NE) && op1 != const0_rtx)
5518     {
5519       tem = expand_binop (mode, xor_optab, op0, op1, subtarget, 1,
5520                           OPTAB_WIDEN);
5521
5522       if (tem == 0)
5523         tem = expand_binop (mode, sub_optab, op0, op1, subtarget, 1,
5524                             OPTAB_WIDEN);
5525       if (tem != 0)
5526         tem = emit_store_flag (target, code, tem, const0_rtx,
5527                                mode, unsignedp, normalizep);
5528       if (tem != 0)
5529         return tem;
5530
5531       delete_insns_since (last);
5532     }
5533
5534   /* For integer comparisons, try the reverse comparison.  However, for
5535      small X and if we'd have anyway to extend, implementing "X != 0"
5536      as "-(int)X >> 31" is still cheaper than inverting "(int)X == 0".  */
5537   rcode = reverse_condition (code);
5538   if (can_compare_p (rcode, mode, ccp_store_flag)
5539       && ! (optab_handler (cstore_optab, mode) == CODE_FOR_nothing
5540             && code == NE
5541             && GET_MODE_SIZE (mode) < UNITS_PER_WORD
5542             && op1 == const0_rtx))
5543     {
5544       int want_add = ((STORE_FLAG_VALUE == 1 && normalizep == -1)
5545                       || (STORE_FLAG_VALUE == -1 && normalizep == 1));
5546
5547       /* Again, for the reverse comparison, use either an addition or a XOR.  */
5548       if (want_add
5549           && rtx_cost (GEN_INT (normalizep), PLUS, 1,
5550                        optimize_insn_for_speed_p ()) == 0)
5551         {
5552           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5553                                    STORE_FLAG_VALUE, target_mode);
5554           if (tem != 0)
5555             tem = expand_binop (target_mode, add_optab, tem,
5556                                 GEN_INT (normalizep), target, 0, OPTAB_WIDEN);
5557         }
5558       else if (!want_add
5559                && rtx_cost (trueval, XOR, 1,
5560                             optimize_insn_for_speed_p ()) == 0)
5561         {
5562           tem = emit_store_flag_1 (subtarget, rcode, op0, op1, mode, 0,
5563                                    normalizep, target_mode);
5564           if (tem != 0)
5565             tem = expand_binop (target_mode, xor_optab, tem, trueval, target,
5566                                 INTVAL (trueval) >= 0, OPTAB_WIDEN);
5567         }
5568
5569       if (tem != 0)
5570         return tem;
5571       delete_insns_since (last);
5572     }
5573
5574   /* Some other cases we can do are EQ, NE, LE, and GT comparisons with
5575      the constant zero.  Reject all other comparisons at this point.  Only
5576      do LE and GT if branches are expensive since they are expensive on
5577      2-operand machines.  */
5578
5579   if (op1 != const0_rtx
5580       || (code != EQ && code != NE
5581           && (BRANCH_COST (optimize_insn_for_speed_p (),
5582                            false) <= 1 || (code != LE && code != GT))))
5583     return 0;
5584
5585   /* Try to put the result of the comparison in the sign bit.  Assume we can't
5586      do the necessary operation below.  */
5587
5588   tem = 0;
5589
5590   /* To see if A <= 0, compute (A | (A - 1)).  A <= 0 iff that result has
5591      the sign bit set.  */
5592
5593   if (code == LE)
5594     {
5595       /* This is destructive, so SUBTARGET can't be OP0.  */
5596       if (rtx_equal_p (subtarget, op0))
5597         subtarget = 0;
5598
5599       tem = expand_binop (mode, sub_optab, op0, const1_rtx, subtarget, 0,
5600                           OPTAB_WIDEN);
5601       if (tem)
5602         tem = expand_binop (mode, ior_optab, op0, tem, subtarget, 0,
5603                             OPTAB_WIDEN);
5604     }
5605
5606   /* To see if A > 0, compute (((signed) A) << BITS) - A, where BITS is the
5607      number of bits in the mode of OP0, minus one.  */
5608
5609   if (code == GT)
5610     {
5611       if (rtx_equal_p (subtarget, op0))
5612         subtarget = 0;
5613
5614       tem = expand_shift (RSHIFT_EXPR, mode, op0,
5615                           GET_MODE_BITSIZE (mode) - 1,
5616                           subtarget, 0);
5617       tem = expand_binop (mode, sub_optab, tem, op0, subtarget, 0,
5618                           OPTAB_WIDEN);
5619     }
5620
5621   if (code == EQ || code == NE)
5622     {
5623       /* For EQ or NE, one way to do the comparison is to apply an operation
5624          that converts the operand into a positive number if it is nonzero
5625          or zero if it was originally zero.  Then, for EQ, we subtract 1 and
5626          for NE we negate.  This puts the result in the sign bit.  Then we
5627          normalize with a shift, if needed.
5628
5629          Two operations that can do the above actions are ABS and FFS, so try
5630          them.  If that doesn't work, and MODE is smaller than a full word,
5631          we can use zero-extension to the wider mode (an unsigned conversion)
5632          as the operation.  */
5633
5634       /* Note that ABS doesn't yield a positive number for INT_MIN, but
5635          that is compensated by the subsequent overflow when subtracting
5636          one / negating.  */
5637
5638       if (optab_handler (abs_optab, mode) != CODE_FOR_nothing)
5639         tem = expand_unop (mode, abs_optab, op0, subtarget, 1);
5640       else if (optab_handler (ffs_optab, mode) != CODE_FOR_nothing)
5641         tem = expand_unop (mode, ffs_optab, op0, subtarget, 1);
5642       else if (GET_MODE_SIZE (mode) < UNITS_PER_WORD)
5643         {
5644           tem = convert_modes (word_mode, mode, op0, 1);
5645           mode = word_mode;
5646         }
5647
5648       if (tem != 0)
5649         {
5650           if (code == EQ)
5651             tem = expand_binop (mode, sub_optab, tem, const1_rtx, subtarget,
5652                                 0, OPTAB_WIDEN);
5653           else
5654             tem = expand_unop (mode, neg_optab, tem, subtarget, 0);
5655         }
5656
5657       /* If we couldn't do it that way, for NE we can "or" the two's complement
5658          of the value with itself.  For EQ, we take the one's complement of
5659          that "or", which is an extra insn, so we only handle EQ if branches
5660          are expensive.  */
5661
5662       if (tem == 0
5663           && (code == NE
5664               || BRANCH_COST (optimize_insn_for_speed_p (),
5665                               false) > 1))
5666         {
5667           if (rtx_equal_p (subtarget, op0))
5668             subtarget = 0;
5669
5670           tem = expand_unop (mode, neg_optab, op0, subtarget, 0);
5671           tem = expand_binop (mode, ior_optab, tem, op0, subtarget, 0,
5672                               OPTAB_WIDEN);
5673
5674           if (tem && code == EQ)
5675             tem = expand_unop (mode, one_cmpl_optab, tem, subtarget, 0);
5676         }
5677     }
5678
5679   if (tem && normalizep)
5680     tem = expand_shift (RSHIFT_EXPR, mode, tem,
5681                         GET_MODE_BITSIZE (mode) - 1,
5682                         subtarget, normalizep == 1);
5683
5684   if (tem)
5685     {
5686       if (!target)
5687         ;
5688       else if (GET_MODE (tem) != target_mode)
5689         {
5690           convert_move (target, tem, 0);
5691           tem = target;
5692         }
5693       else if (!subtarget)
5694         {
5695           emit_move_insn (target, tem);
5696           tem = target;
5697         }
5698     }
5699   else
5700     delete_insns_since (last);
5701
5702   return tem;
5703 }
5704
5705 /* Like emit_store_flag, but always succeeds.  */
5706
5707 rtx
5708 emit_store_flag_force (rtx target, enum rtx_code code, rtx op0, rtx op1,
5709                        enum machine_mode mode, int unsignedp, int normalizep)
5710 {
5711   rtx tem, label;
5712   rtx trueval, falseval;
5713
5714   /* First see if emit_store_flag can do the job.  */
5715   tem = emit_store_flag (target, code, op0, op1, mode, unsignedp, normalizep);
5716   if (tem != 0)
5717     return tem;
5718
5719   if (!target)
5720     target = gen_reg_rtx (word_mode);
5721
5722   /* If this failed, we have to do this with set/compare/jump/set code.
5723      For foo != 0, if foo is in OP0, just replace it with 1 if nonzero.  */
5724   trueval = normalizep ? GEN_INT (normalizep) : const1_rtx;
5725   if (code == NE
5726       && GET_MODE_CLASS (mode) == MODE_INT
5727       && REG_P (target)
5728       && op0 == target
5729       && op1 == const0_rtx)
5730     {
5731       label = gen_label_rtx ();
5732       do_compare_rtx_and_jump (target, const0_rtx, EQ, unsignedp,
5733                                mode, NULL_RTX, NULL_RTX, label, -1);
5734       emit_move_insn (target, trueval);
5735       emit_label (label);
5736       return target;
5737     }
5738
5739   if (!REG_P (target)
5740       || reg_mentioned_p (target, op0) || reg_mentioned_p (target, op1))
5741     target = gen_reg_rtx (GET_MODE (target));
5742
5743   /* Jump in the right direction if the target cannot implement CODE
5744      but can jump on its reverse condition.  */
5745   falseval = const0_rtx;
5746   if (! can_compare_p (code, mode, ccp_jump)
5747       && (! FLOAT_MODE_P (mode)
5748           || code == ORDERED || code == UNORDERED
5749           || (! HONOR_NANS (mode) && (code == LTGT || code == UNEQ))
5750           || (! HONOR_SNANS (mode) && (code == EQ || code == NE))))
5751     {
5752       enum rtx_code rcode;
5753       if (FLOAT_MODE_P (mode))
5754         rcode = reverse_condition_maybe_unordered (code);
5755       else
5756         rcode = reverse_condition (code);
5757
5758       /* Canonicalize to UNORDERED for the libcall.  */
5759       if (can_compare_p (rcode, mode, ccp_jump)
5760           || (code == ORDERED && ! can_compare_p (ORDERED, mode, ccp_jump)))
5761         {
5762           falseval = trueval;
5763           trueval = const0_rtx;
5764           code = rcode;
5765         }
5766     }
5767
5768   emit_move_insn (target, trueval);
5769   label = gen_label_rtx ();
5770   do_compare_rtx_and_jump (op0, op1, code, unsignedp, mode, NULL_RTX,
5771                            NULL_RTX, label, -1);
5772
5773   emit_move_insn (target, falseval);
5774   emit_label (label);
5775
5776   return target;
5777 }
5778 \f
5779 /* Perform possibly multi-word comparison and conditional jump to LABEL
5780    if ARG1 OP ARG2 true where ARG1 and ARG2 are of mode MODE.  This is
5781    now a thin wrapper around do_compare_rtx_and_jump.  */
5782
5783 static void
5784 do_cmp_and_jump (rtx arg1, rtx arg2, enum rtx_code op, enum machine_mode mode,
5785                  rtx label)
5786 {
5787   int unsignedp = (op == LTU || op == LEU || op == GTU || op == GEU);
5788   do_compare_rtx_and_jump (arg1, arg2, op, unsignedp, mode,
5789                            NULL_RTX, NULL_RTX, label, -1);
5790 }