gcc/config/arm/neon.md

   1 ;; ARM NEON coprocessor Machine Description
   2 ;; Copyright (C) 2006-2019 Free Software Foundation, Inc.
   3 ;; Written by CodeSourcery.
   4 ;;
   5 ;; This file is part of GCC.
   6 ;;
   7 ;; GCC is free software; you can redistribute it and/or modify it
   8 ;; under the terms of the GNU General Public License as published by
   9 ;; the Free Software Foundation; either version 3, or (at your option)
  10 ;; any later version.
  11 ;;
  12 ;; GCC is distributed in the hope that it will be useful, but
  13 ;; WITHOUT ANY WARRANTY; without even the implied warranty of
  14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  15 ;; General Public License for more details.
  16 ;;
  17 ;; You should have received a copy of the GNU General Public License
  18 ;; along with GCC; see the file COPYING3.  If not see
  19 ;; <http://www.gnu.org/licenses/>.
  20
  21
  22 ;; Attribute used to permit string comparisons against <VQH_mnem> in
  23 ;; type attribute definitions.
  24 (define_attr "vqh_mnem" "vadd,vmin,vmax" (const_string "vadd"))
  25
  26 (define_insn "*neon_mov<mode>"
  27   [(set (match_operand:VDX 0 "nonimmediate_operand"
  28           "=w,Un,w, w, w,  ?r,?w,?r, ?Us,*r")
  29         (match_operand:VDX 1 "general_operand"
  30           " w,w, Dm,Dn,Uni, w, r, Usi,r,*r"))]
  31   "TARGET_NEON
  32    && (register_operand (operands[0], <MODE>mode)
  33        || register_operand (operands[1], <MODE>mode))"
  34 {
  35   if (which_alternative == 2 || which_alternative == 3)
  36     {
  37       int width, is_valid;
  38       static char templ[40];
  39
  40       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
  41         &operands[1], &width);
  42
  43       gcc_assert (is_valid != 0);
  44
  45       if (width == 0)
  46         return "vmov.f32\t%P0, %1  @ <mode>";
  47       else
  48         sprintf (templ, "vmov.i%d\t%%P0, %%x1  @ <mode>", width);
  49
  50       return templ;
  51     }
  52
  53   switch (which_alternative)
  54     {
  55     case 0: return "vmov\t%P0, %P1  @ <mode>";
  56     case 1: case 4: return output_move_neon (operands);
  57     case 2: case 3: gcc_unreachable ();
  58     case 5: return "vmov\t%Q0, %R0, %P1  @ <mode>";
  59     case 6: return "vmov\t%P0, %Q1, %R1  @ <mode>";
  60     case 9: return "#";
  61     default: return output_move_double (operands, true, NULL);
  62     }
  63 }
  64  [(set_attr "type" "neon_move<q>,neon_store1_1reg,neon_move<q>,\
  65                     neon_move<q>,neon_load1_1reg, neon_to_gp<q>,\
  66                     neon_from_gp<q>,neon_load1_2reg, neon_store1_2reg,\
  67                     multiple")
  68   (set_attr "length" "4,4,4,4,4,4,4,8,8,8")
  69   (set_attr "arm_pool_range"     "*,*,*,*,1020,*,*,1020,*,*")
  70   (set_attr "thumb2_pool_range"     "*,*,*,*,1018,*,*,1018,*,*")
  71   (set_attr "neg_pool_range" "*,*,*,*,1004,*,*,1004,*,*")])
  72
  73 (define_insn "*neon_mov<mode>"
  74   [(set (match_operand:VQXMOV 0 "nonimmediate_operand"
  75           "=w,Un,w, w, w,  ?r,?w,?r,?r,  ?Us")
  76         (match_operand:VQXMOV 1 "general_operand"
  77           " w,w, Dm,DN,Uni, w, r, r, Usi, r"))]
  78   "TARGET_NEON
  79    && (register_operand (operands[0], <MODE>mode)
  80        || register_operand (operands[1], <MODE>mode))"
  81 {
  82   if (which_alternative == 2 || which_alternative == 3)
  83     {
  84       int width, is_valid;
  85       static char templ[40];
  86
  87       is_valid = neon_immediate_valid_for_move (operands[1], <MODE>mode,
  88         &operands[1], &width);
  89
  90       gcc_assert (is_valid != 0);
  91
  92       if (width == 0)
  93         return "vmov.f32\t%q0, %1  @ <mode>";
  94       else
  95         sprintf (templ, "vmov.i%d\t%%q0, %%1  @ <mode>", width);
  96
  97       return templ;
  98     }
  99
 100   switch (which_alternative)
 101     {
 102     case 0: return "vmov\t%q0, %q1  @ <mode>";
 103     case 1: case 4: return output_move_neon (operands);
 104     case 2: case 3: gcc_unreachable ();
 105     case 5: return "vmov\t%Q0, %R0, %e1  @ <mode>\;vmov\t%J0, %K0, %f1";
 106     case 6: return "vmov\t%e0, %Q1, %R1  @ <mode>\;vmov\t%f0, %J1, %K1";
 107     default: return output_move_quad (operands);
 108     }
 109 }
 110   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_move_q,\
 111                      neon_move_q,neon_load2_2reg_q,neon_to_gp_q,\
 112                      neon_from_gp_q,mov_reg,neon_load1_4reg,neon_store1_4reg")
 113    (set_attr "length" "4,8,4,4,8,8,8,16,8,16")
 114    (set_attr "arm_pool_range" "*,*,*,*,1020,*,*,*,1020,*")
 115    (set_attr "thumb2_pool_range" "*,*,*,*,1018,*,*,*,1018,*")
 116    (set_attr "neg_pool_range" "*,*,*,*,996,*,*,*,996,*")])
 117
 118 /* We define these mov expanders to match the standard mov$a optab to prevent
 119    the mid-end from trying to do a subreg for these modes which is the most
 120    inefficient way to expand the move.  Also big-endian subreg's aren't
 121    allowed for a subset of modes, See TARGET_CAN_CHANGE_MODE_CLASS.
 122    Without these RTL generation patterns the mid-end would attempt to take a
 123    sub-reg and may ICE if it can't.  */
 124
 125 (define_expand "movti"
 126   [(set (match_operand:TI 0 "nonimmediate_operand")
 127         (match_operand:TI 1 "general_operand"))]
 128   "TARGET_NEON"
 129 {
 130   if (can_create_pseudo_p ())
 131     {
 132       if (!REG_P (operands[0]))
 133         operands[1] = force_reg (TImode, operands[1]);
 134     }
 135 })
 136
 137 (define_expand "mov<mode>"
 138   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand")
 139         (match_operand:VSTRUCT 1 "general_operand"))]
 140   "TARGET_NEON"
 141 {
 142   if (can_create_pseudo_p ())
 143     {
 144       if (!REG_P (operands[0]))
 145         operands[1] = force_reg (<MODE>mode, operands[1]);
 146     }
 147 })
 148
 149 (define_expand "mov<mode>"
 150   [(set (match_operand:VH 0 "s_register_operand")
 151         (match_operand:VH 1 "s_register_operand"))]
 152   "TARGET_NEON"
 153 {
 154   if (can_create_pseudo_p ())
 155     {
 156       if (!REG_P (operands[0]))
 157         operands[1] = force_reg (<MODE>mode, operands[1]);
 158     }
 159 })
 160
 161 (define_insn "*neon_mov<mode>"
 162   [(set (match_operand:VSTRUCT 0 "nonimmediate_operand" "=w,Ut,w")
 163         (match_operand:VSTRUCT 1 "general_operand"      " w,w, Ut"))]
 164   "TARGET_NEON
 165    && (register_operand (operands[0], <MODE>mode)
 166        || register_operand (operands[1], <MODE>mode))"
 167 {
 168   switch (which_alternative)
 169     {
 170     case 0: return "#";
 171     case 1: case 2: return output_move_neon (operands);
 172     default: gcc_unreachable ();
 173     }
 174 }
 175   [(set_attr "type" "neon_move_q,neon_store2_2reg_q,neon_load2_2reg_q")
 176    (set (attr "length") (symbol_ref "arm_attr_length_move_neon (insn)"))])
 177
 178 (define_split
 179   [(set (match_operand:EI 0 "s_register_operand" "")
 180         (match_operand:EI 1 "s_register_operand" ""))]
 181   "TARGET_NEON && reload_completed"
 182   [(set (match_dup 0) (match_dup 1))
 183    (set (match_dup 2) (match_dup 3))]
 184 {
 185   int rdest = REGNO (operands[0]);
 186   int rsrc = REGNO (operands[1]);
 187   rtx dest[2], src[2];
 188
 189   dest[0] = gen_rtx_REG (TImode, rdest);
 190   src[0] = gen_rtx_REG (TImode, rsrc);
 191   dest[1] = gen_rtx_REG (DImode, rdest + 4);
 192   src[1] = gen_rtx_REG (DImode, rsrc + 4);
 193
 194   neon_disambiguate_copy (operands, dest, src, 2);
 195 })
 196
 197 (define_split
 198   [(set (match_operand:OI 0 "s_register_operand" "")
 199         (match_operand:OI 1 "s_register_operand" ""))]
 200   "TARGET_NEON && reload_completed"
 201   [(set (match_dup 0) (match_dup 1))
 202    (set (match_dup 2) (match_dup 3))]
 203 {
 204   int rdest = REGNO (operands[0]);
 205   int rsrc = REGNO (operands[1]);
 206   rtx dest[2], src[2];
 207
 208   dest[0] = gen_rtx_REG (TImode, rdest);
 209   src[0] = gen_rtx_REG (TImode, rsrc);
 210   dest[1] = gen_rtx_REG (TImode, rdest + 4);
 211   src[1] = gen_rtx_REG (TImode, rsrc + 4);
 212
 213   neon_disambiguate_copy (operands, dest, src, 2);
 214 })
 215
 216 (define_split
 217   [(set (match_operand:CI 0 "s_register_operand" "")
 218         (match_operand:CI 1 "s_register_operand" ""))]
 219   "TARGET_NEON && reload_completed"
 220   [(set (match_dup 0) (match_dup 1))
 221    (set (match_dup 2) (match_dup 3))
 222    (set (match_dup 4) (match_dup 5))]
 223 {
 224   int rdest = REGNO (operands[0]);
 225   int rsrc = REGNO (operands[1]);
 226   rtx dest[3], src[3];
 227
 228   dest[0] = gen_rtx_REG (TImode, rdest);
 229   src[0] = gen_rtx_REG (TImode, rsrc);
 230   dest[1] = gen_rtx_REG (TImode, rdest + 4);
 231   src[1] = gen_rtx_REG (TImode, rsrc + 4);
 232   dest[2] = gen_rtx_REG (TImode, rdest + 8);
 233   src[2] = gen_rtx_REG (TImode, rsrc + 8);
 234
 235   neon_disambiguate_copy (operands, dest, src, 3);
 236 })
 237
 238 (define_split
 239   [(set (match_operand:XI 0 "s_register_operand" "")
 240         (match_operand:XI 1 "s_register_operand" ""))]
 241   "TARGET_NEON && reload_completed"
 242   [(set (match_dup 0) (match_dup 1))
 243    (set (match_dup 2) (match_dup 3))
 244    (set (match_dup 4) (match_dup 5))
 245    (set (match_dup 6) (match_dup 7))]
 246 {
 247   int rdest = REGNO (operands[0]);
 248   int rsrc = REGNO (operands[1]);
 249   rtx dest[4], src[4];
 250
 251   dest[0] = gen_rtx_REG (TImode, rdest);
 252   src[0] = gen_rtx_REG (TImode, rsrc);
 253   dest[1] = gen_rtx_REG (TImode, rdest + 4);
 254   src[1] = gen_rtx_REG (TImode, rsrc + 4);
 255   dest[2] = gen_rtx_REG (TImode, rdest + 8);
 256   src[2] = gen_rtx_REG (TImode, rsrc + 8);
 257   dest[3] = gen_rtx_REG (TImode, rdest + 12);
 258   src[3] = gen_rtx_REG (TImode, rsrc + 12);
 259
 260   neon_disambiguate_copy (operands, dest, src, 4);
 261 })
 262
 263 (define_expand "movmisalign<mode>"
 264   [(set (match_operand:VDQX 0 "neon_perm_struct_or_reg_operand")
 265         (unspec:VDQX [(match_operand:VDQX 1 "neon_perm_struct_or_reg_operand")]
 266                      UNSPEC_MISALIGNED_ACCESS))]
 267   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 268 {
 269   rtx adjust_mem;
 270   /* This pattern is not permitted to fail during expansion: if both arguments
 271      are non-registers (e.g. memory := constant, which can be created by the
 272      auto-vectorizer), force operand 1 into a register.  */
 273   if (!s_register_operand (operands[0], <MODE>mode)
 274       && !s_register_operand (operands[1], <MODE>mode))
 275     operands[1] = force_reg (<MODE>mode, operands[1]);
 276
 277   if (s_register_operand (operands[0], <MODE>mode))
 278     adjust_mem = operands[1];
 279   else
 280     adjust_mem = operands[0];
 281
 282   /* Legitimize address.  */
 283   if (!neon_vector_mem_operand (adjust_mem, 2, true))
 284     XEXP (adjust_mem, 0) = force_reg (Pmode, XEXP (adjust_mem, 0));
 285
 286 })
 287
 288 (define_insn "*movmisalign<mode>_neon_store"
 289   [(set (match_operand:VDX 0 "neon_permissive_struct_operand"   "=Um")
 290         (unspec:VDX [(match_operand:VDX 1 "s_register_operand" " w")]
 291                     UNSPEC_MISALIGNED_ACCESS))]
 292   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 293   "vst1.<V_sz_elem>\t{%P1}, %A0"
 294   [(set_attr "type" "neon_store1_1reg<q>")])
 295
 296 (define_insn "*movmisalign<mode>_neon_load"
 297   [(set (match_operand:VDX 0 "s_register_operand"                       "=w")
 298         (unspec:VDX [(match_operand:VDX 1 "neon_permissive_struct_operand"
 299                                                                         " Um")]
 300                     UNSPEC_MISALIGNED_ACCESS))]
 301   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 302   "vld1.<V_sz_elem>\t{%P0}, %A1"
 303   [(set_attr "type" "neon_load1_1reg<q>")])
 304
 305 (define_insn "*movmisalign<mode>_neon_store"
 306   [(set (match_operand:VQX 0 "neon_permissive_struct_operand"  "=Um")
 307         (unspec:VQX [(match_operand:VQX 1 "s_register_operand" " w")]
 308                     UNSPEC_MISALIGNED_ACCESS))]
 309   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 310   "vst1.<V_sz_elem>\t{%q1}, %A0"
 311   [(set_attr "type" "neon_store1_1reg<q>")])
 312
 313 (define_insn "*movmisalign<mode>_neon_load"
 314   [(set (match_operand:VQX 0 "s_register_operand"                       "=w")
 315         (unspec:VQX [(match_operand:VQX 1 "neon_permissive_struct_operand"
 316                                                                         " Um")]
 317                     UNSPEC_MISALIGNED_ACCESS))]
 318   "TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access"
 319   "vld1.<V_sz_elem>\t{%q0}, %A1"
 320   [(set_attr "type" "neon_load1_1reg<q>")])
 321
 322 (define_insn "@vec_set<mode>_internal"
 323   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w,w")
 324         (vec_merge:VD_LANE
 325           (vec_duplicate:VD_LANE
 326             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
 327           (match_operand:VD_LANE 3 "s_register_operand" "0,0")
 328           (match_operand:SI 2 "immediate_operand" "i,i")))]
 329   "TARGET_NEON"
 330 {
 331   int elt = ffs ((int) INTVAL (operands[2])) - 1;
 332   if (BYTES_BIG_ENDIAN)
 333     elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
 334   operands[2] = GEN_INT (elt);
 335
 336   if (which_alternative == 0)
 337     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
 338   else
 339     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
 340 }
 341   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")])
 342
 343 (define_insn "@vec_set<mode>_internal"
 344   [(set (match_operand:VQ2 0 "s_register_operand" "=w,w")
 345         (vec_merge:VQ2
 346           (vec_duplicate:VQ2
 347             (match_operand:<V_elem> 1 "nonimmediate_operand" "Um,r"))
 348           (match_operand:VQ2 3 "s_register_operand" "0,0")
 349           (match_operand:SI 2 "immediate_operand" "i,i")))]
 350   "TARGET_NEON"
 351 {
 352   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
 353   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
 354   int elt = elem % half_elts;
 355   int hi = (elem / half_elts) * 2;
 356   int regno = REGNO (operands[0]);
 357
 358   if (BYTES_BIG_ENDIAN)
 359     elt = half_elts - 1 - elt;
 360
 361   operands[0] = gen_rtx_REG (<V_HALF>mode, regno + hi);
 362   operands[2] = GEN_INT (elt);
 363
 364   if (which_alternative == 0)
 365     return "vld1.<V_sz_elem>\t{%P0[%c2]}, %A1";
 366   else
 367     return "vmov.<V_sz_elem>\t%P0[%c2], %1";
 368 }
 369   [(set_attr "type" "neon_load1_all_lanes<q>,neon_from_gp<q>")]
 370 )
 371
 372 (define_insn "@vec_set<mode>_internal"
 373   [(set (match_operand:V2DI_ONLY 0 "s_register_operand" "=w,w")
 374         (vec_merge:V2DI_ONLY
 375           (vec_duplicate:V2DI_ONLY
 376             (match_operand:DI 1 "nonimmediate_operand" "Um,r"))
 377           (match_operand:V2DI_ONLY 3 "s_register_operand" "0,0")
 378           (match_operand:SI 2 "immediate_operand" "i,i")))]
 379   "TARGET_NEON"
 380 {
 381   HOST_WIDE_INT elem = ffs ((int) INTVAL (operands[2])) - 1;
 382   int regno = REGNO (operands[0]) + 2 * elem;
 383
 384   operands[0] = gen_rtx_REG (DImode, regno);
 385
 386   if (which_alternative == 0)
 387     return "vld1.64\t%P0, %A1";
 388   else
 389     return "vmov\t%P0, %Q1, %R1";
 390 }
 391   [(set_attr "type" "neon_load1_all_lanes_q,neon_from_gp_q")]
 392 )
 393
 394 (define_expand "vec_set<mode>"
 395   [(match_operand:VDQ 0 "s_register_operand")
 396    (match_operand:<V_elem> 1 "s_register_operand")
 397    (match_operand:SI 2 "immediate_operand")]
 398   "TARGET_NEON"
 399 {
 400   HOST_WIDE_INT elem = HOST_WIDE_INT_1 << INTVAL (operands[2]);
 401   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
 402                                          GEN_INT (elem), operands[0]));
 403   DONE;
 404 })
 405
 406 (define_insn "vec_extract<mode><V_elem_l>"
 407   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
 408         (vec_select:<V_elem>
 409           (match_operand:VD_LANE 1 "s_register_operand" "w,w")
 410           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
 411   "TARGET_NEON"
 412 {
 413   if (BYTES_BIG_ENDIAN)
 414     {
 415       int elt = INTVAL (operands[2]);
 416       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
 417       operands[2] = GEN_INT (elt);
 418     }
 419
 420   if (which_alternative == 0)
 421     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 422   else
 423     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
 424 }
 425   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
 426 )
 427
 428 (define_insn "vec_extract<mode><V_elem_l>"
 429   [(set (match_operand:<V_elem> 0 "nonimmediate_operand" "=Um,r")
 430         (vec_select:<V_elem>
 431           (match_operand:VQ2 1 "s_register_operand" "w,w")
 432           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
 433   "TARGET_NEON"
 434 {
 435   int half_elts = GET_MODE_NUNITS (<MODE>mode) / 2;
 436   int elt = INTVAL (operands[2]) % half_elts;
 437   int hi = (INTVAL (operands[2]) / half_elts) * 2;
 438   int regno = REGNO (operands[1]);
 439
 440   if (BYTES_BIG_ENDIAN)
 441     elt = half_elts - 1 - elt;
 442
 443   operands[1] = gen_rtx_REG (<V_HALF>mode, regno + hi);
 444   operands[2] = GEN_INT (elt);
 445
 446   if (which_alternative == 0)
 447     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
 448   else
 449     return "vmov.<V_uf_sclr>\t%0, %P1[%c2]";
 450 }
 451   [(set_attr "type" "neon_store1_one_lane<q>,neon_to_gp<q>")]
 452 )
 453
 454 (define_insn "vec_extractv2didi"
 455   [(set (match_operand:DI 0 "nonimmediate_operand" "=Um,r")
 456         (vec_select:DI
 457           (match_operand:V2DI 1 "s_register_operand" "w,w")
 458           (parallel [(match_operand:SI 2 "immediate_operand" "i,i")])))]
 459   "TARGET_NEON"
 460 {
 461   int regno = REGNO (operands[1]) + 2 * INTVAL (operands[2]);
 462
 463   operands[1] = gen_rtx_REG (DImode, regno);
 464
 465   if (which_alternative == 0)
 466     return "vst1.64\t{%P1}, %A0  @ v2di";
 467   else
 468     return "vmov\t%Q0, %R0, %P1  @ v2di";
 469 }
 470   [(set_attr "type" "neon_store1_one_lane_q,neon_to_gp_q")]
 471 )
 472
 473 (define_expand "vec_init<mode><V_elem_l>"
 474   [(match_operand:VDQ 0 "s_register_operand")
 475    (match_operand 1 "" "")]
 476   "TARGET_NEON"
 477 {
 478   neon_expand_vector_init (operands[0], operands[1]);
 479   DONE;
 480 })
 481
 482 ;; Doubleword and quadword arithmetic.
 483
 484 ;; NOTE: some other instructions also support 64-bit integer
 485 ;; element size, which we could potentially use for "long long" operations.
 486
 487 (define_insn "*add<mode>3_neon"
 488   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 489         (plus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 490                   (match_operand:VDQ 2 "s_register_operand" "w")))]
 491   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 492   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 493   [(set (attr "type")
 494       (if_then_else (match_test "<Is_float_mode>")
 495                     (const_string "neon_fp_addsub_s<q>")
 496                     (const_string "neon_add<q>")))]
 497 )
 498
 499 ;; As with SFmode, full support for HFmode vector arithmetic is only available
 500 ;; when flag-unsafe-math-optimizations is enabled.
 501
 502 (define_insn "add<mode>3"
 503   [(set
 504     (match_operand:VH 0 "s_register_operand" "=w")
 505     (plus:VH
 506      (match_operand:VH 1 "s_register_operand" "w")
 507      (match_operand:VH 2 "s_register_operand" "w")))]
 508  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
 509  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 510  [(set (attr "type")
 511    (if_then_else (match_test "<Is_float_mode>")
 512     (const_string "neon_fp_addsub_s<q>")
 513     (const_string "neon_add<q>")))]
 514 )
 515
 516 (define_insn "add<mode>3_fp16"
 517   [(set
 518     (match_operand:VH 0 "s_register_operand" "=w")
 519     (plus:VH
 520      (match_operand:VH 1 "s_register_operand" "w")
 521      (match_operand:VH 2 "s_register_operand" "w")))]
 522  "TARGET_NEON_FP16INST"
 523  "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 524  [(set (attr "type")
 525    (if_then_else (match_test "<Is_float_mode>")
 526     (const_string "neon_fp_addsub_s<q>")
 527     (const_string "neon_add<q>")))]
 528 )
 529
 530 (define_insn "adddi3_neon"
 531   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?w,?&r,?&r,?&r")
 532         (plus:DI (match_operand:DI 1 "s_register_operand" "%w,0,0,w,r,0,r")
 533                  (match_operand:DI 2 "arm_adddi_operand"     "w,r,0,w,r,Dd,Dd")))
 534    (clobber (reg:CC CC_REGNUM))]
 535   "TARGET_NEON"
 536 {
 537   switch (which_alternative)
 538     {
 539     case 0: /* fall through */
 540     case 3: return "vadd.i64\t%P0, %P1, %P2";
 541     case 1: return "#";
 542     case 2: return "#";
 543     case 4: return "#";
 544     case 5: return "#";
 545     case 6: return "#";
 546     default: gcc_unreachable ();
 547     }
 548 }
 549   [(set_attr "type" "neon_add,multiple,multiple,neon_add,\
 550                      multiple,multiple,multiple")
 551    (set_attr "conds" "*,clob,clob,*,clob,clob,clob")
 552    (set_attr "length" "*,8,8,*,8,8,8")
 553    (set_attr "arch" "neon_for_64bits,*,*,avoid_neon_for_64bits,*,*,*")]
 554 )
 555
 556 (define_insn "*sub<mode>3_neon"
 557   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 558         (minus:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 559                    (match_operand:VDQ 2 "s_register_operand" "w")))]
 560   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 561   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 562   [(set (attr "type")
 563       (if_then_else (match_test "<Is_float_mode>")
 564                     (const_string "neon_fp_addsub_s<q>")
 565                     (const_string "neon_sub<q>")))]
 566 )
 567
 568 (define_insn "sub<mode>3"
 569  [(set
 570    (match_operand:VH 0 "s_register_operand" "=w")
 571    (minus:VH
 572     (match_operand:VH 1 "s_register_operand" "w")
 573     (match_operand:VH 2 "s_register_operand" "w")))]
 574  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
 575  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 576  [(set_attr "type" "neon_sub<q>")]
 577 )
 578
 579 (define_insn "sub<mode>3_fp16"
 580  [(set
 581    (match_operand:VH 0 "s_register_operand" "=w")
 582    (minus:VH
 583     (match_operand:VH 1 "s_register_operand" "w")
 584     (match_operand:VH 2 "s_register_operand" "w")))]
 585  "TARGET_NEON_FP16INST"
 586  "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 587  [(set_attr "type" "neon_sub<q>")]
 588 )
 589
 590 (define_insn "subdi3_neon"
 591   [(set (match_operand:DI 0 "s_register_operand" "=w,?&r,?&r,?&r,?w")
 592         (minus:DI (match_operand:DI 1 "s_register_operand" "w,0,r,0,w")
 593                   (match_operand:DI 2 "s_register_operand" "w,r,0,0,w")))
 594    (clobber (reg:CC CC_REGNUM))]
 595   "TARGET_NEON"
 596 {
 597   switch (which_alternative)
 598     {
 599     case 0: /* fall through */
 600     case 4: return "vsub.i64\t%P0, %P1, %P2";
 601     case 1: /* fall through */
 602     case 2: /* fall through */
 603     case 3: return  "subs\\t%Q0, %Q1, %Q2\;sbc\\t%R0, %R1, %R2";
 604     default: gcc_unreachable ();
 605     }
 606 }
 607   [(set_attr "type" "neon_sub,multiple,multiple,multiple,neon_sub")
 608    (set_attr "conds" "*,clob,clob,clob,*")
 609    (set_attr "length" "*,8,8,8,*")
 610    (set_attr "arch" "neon_for_64bits,*,*,*,avoid_neon_for_64bits")]
 611 )
 612
 613 (define_insn "*mul<mode>3_neon"
 614   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 615         (mult:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
 616                    (match_operand:VDQW 2 "s_register_operand" "w")))]
 617   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 618   "vmul.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 619   [(set (attr "type")
 620       (if_then_else (match_test "<Is_float_mode>")
 621                     (const_string "neon_fp_mul_s<q>")
 622                     (const_string "neon_mul_<V_elem_ch><q>")))]
 623 )
 624
 625 /* Perform division using multiply-by-reciprocal.
 626    Reciprocal is calculated using Newton-Raphson method.
 627    Enabled with -funsafe-math-optimizations -freciprocal-math
 628    and disabled for -Os since it increases code size .  */
 629
 630 (define_expand "div<mode>3"
 631   [(set (match_operand:VCVTF 0 "s_register_operand")
 632         (div:VCVTF (match_operand:VCVTF 1 "s_register_operand")
 633                   (match_operand:VCVTF 2 "s_register_operand")))]
 634   "TARGET_NEON && !optimize_size
 635    && flag_reciprocal_math"
 636   {
 637     rtx rec = gen_reg_rtx (<MODE>mode);
 638     rtx vrecps_temp = gen_reg_rtx (<MODE>mode);
 639
 640     /* Reciprocal estimate.  */
 641     emit_insn (gen_neon_vrecpe<mode> (rec, operands[2]));
 642
 643     /* Perform 2 iterations of newton-raphson method.  */
 644     for (int i = 0; i < 2; i++)
 645       {
 646         emit_insn (gen_neon_vrecps<mode> (vrecps_temp, rec, operands[2]));
 647         emit_insn (gen_mul<mode>3 (rec, rec, vrecps_temp));
 648       }
 649
 650     /* We now have reciprocal in rec, perform operands[0] = operands[1] * rec.  */
 651     emit_insn (gen_mul<mode>3 (operands[0], operands[1], rec));
 652     DONE;
 653   }
 654 )
 655
 656
 657 (define_insn "mul<mode>3add<mode>_neon"
 658   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 659         (plus:VDQW (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
 660                             (match_operand:VDQW 3 "s_register_operand" "w"))
 661                   (match_operand:VDQW 1 "s_register_operand" "0")))]
 662   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 663   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
 664   [(set (attr "type")
 665       (if_then_else (match_test "<Is_float_mode>")
 666                     (const_string "neon_fp_mla_s<q>")
 667                     (const_string "neon_mla_<V_elem_ch><q>")))]
 668 )
 669
 670 (define_insn "mul<mode>3add<mode>_neon"
 671   [(set (match_operand:VH 0 "s_register_operand" "=w")
 672         (plus:VH (mult:VH (match_operand:VH 2 "s_register_operand" "w")
 673                           (match_operand:VH 3 "s_register_operand" "w"))
 674                   (match_operand:VH 1 "s_register_operand" "0")))]
 675   "TARGET_NEON_FP16INST && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 676   "vmla.f16\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
 677   [(set_attr "type" "neon_fp_mla_s<q>")]
 678 )
 679
 680 (define_insn "mul<mode>3neg<mode>add<mode>_neon"
 681   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 682         (minus:VDQW (match_operand:VDQW 1 "s_register_operand" "0")
 683                     (mult:VDQW (match_operand:VDQW 2 "s_register_operand" "w")
 684                                (match_operand:VDQW 3 "s_register_operand" "w"))))]
 685   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
 686   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
 687   [(set (attr "type")
 688       (if_then_else (match_test "<Is_float_mode>")
 689                     (const_string "neon_fp_mla_s<q>")
 690                     (const_string "neon_mla_<V_elem_ch><q>")))]
 691 )
 692
 693 ;; Fused multiply-accumulate
 694 ;; We define each insn twice here:
 695 ;;    1: with flag_unsafe_math_optimizations for the widening multiply phase
 696 ;;       to be able to use when converting to FMA.
 697 ;;    2: without flag_unsafe_math_optimizations for the intrinsics to use.
 698 (define_insn "fma<VCVTF:mode>4"
 699   [(set (match_operand:VCVTF 0 "register_operand" "=w")
 700         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
 701                  (match_operand:VCVTF 2 "register_operand" "w")
 702                  (match_operand:VCVTF 3 "register_operand" "0")))]
 703   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
 704   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 705   [(set_attr "type" "neon_fp_mla_s<q>")]
 706 )
 707
 708 (define_insn "fma<VCVTF:mode>4_intrinsic"
 709   [(set (match_operand:VCVTF 0 "register_operand" "=w")
 710         (fma:VCVTF (match_operand:VCVTF 1 "register_operand" "w")
 711                  (match_operand:VCVTF 2 "register_operand" "w")
 712                  (match_operand:VCVTF 3 "register_operand" "0")))]
 713   "TARGET_NEON && TARGET_FMA"
 714   "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 715   [(set_attr "type" "neon_fp_mla_s<q>")]
 716 )
 717
 718 (define_insn "fma<VH:mode>4"
 719  [(set (match_operand:VH 0 "register_operand" "=w")
 720    (fma:VH
 721     (match_operand:VH 1 "register_operand" "w")
 722     (match_operand:VH 2 "register_operand" "w")
 723     (match_operand:VH 3 "register_operand" "0")))]
 724  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
 725  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 726  [(set_attr "type" "neon_fp_mla_s<q>")]
 727 )
 728
 729 (define_insn "fma<VH:mode>4_intrinsic"
 730  [(set (match_operand:VH 0 "register_operand" "=w")
 731    (fma:VH
 732     (match_operand:VH 1 "register_operand" "w")
 733     (match_operand:VH 2 "register_operand" "w")
 734     (match_operand:VH 3 "register_operand" "0")))]
 735  "TARGET_NEON_FP16INST"
 736  "vfma.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 737  [(set_attr "type" "neon_fp_mla_s<q>")]
 738 )
 739
 740 (define_insn "*fmsub<VCVTF:mode>4"
 741   [(set (match_operand:VCVTF 0 "register_operand" "=w")
 742         (fma:VCVTF (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
 743                    (match_operand:VCVTF 2 "register_operand" "w")
 744                    (match_operand:VCVTF 3 "register_operand" "0")))]
 745   "TARGET_NEON && TARGET_FMA && flag_unsafe_math_optimizations"
 746   "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 747   [(set_attr "type" "neon_fp_mla_s<q>")]
 748 )
 749
 750 (define_insn "fmsub<VCVTF:mode>4_intrinsic"
 751  [(set (match_operand:VCVTF 0 "register_operand" "=w")
 752    (fma:VCVTF
 753     (neg:VCVTF (match_operand:VCVTF 1 "register_operand" "w"))
 754     (match_operand:VCVTF 2 "register_operand" "w")
 755     (match_operand:VCVTF 3 "register_operand" "0")))]
 756  "TARGET_NEON && TARGET_FMA"
 757  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 758  [(set_attr "type" "neon_fp_mla_s<q>")]
 759 )
 760
 761 (define_insn "fmsub<VH:mode>4_intrinsic"
 762  [(set (match_operand:VH 0 "register_operand" "=w")
 763    (fma:VH
 764     (neg:VH (match_operand:VH 1 "register_operand" "w"))
 765     (match_operand:VH 2 "register_operand" "w")
 766     (match_operand:VH 3 "register_operand" "0")))]
 767  "TARGET_NEON_FP16INST"
 768  "vfms.<V_if_elem>\\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 769  [(set_attr "type" "neon_fp_mla_s<q>")]
 770 )
 771
 772 (define_insn "neon_vrint<NEON_VRINT:nvrint_variant><VCVTF:mode>"
 773   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
 774         (unspec:VCVTF [(match_operand:VCVTF 1
 775                          "s_register_operand" "w")]
 776                 NEON_VRINT))]
 777   "TARGET_NEON && TARGET_VFP5"
 778   "vrint<nvrint_variant>.f32\\t%<V_reg>0, %<V_reg>1"
 779   [(set_attr "type" "neon_fp_round_<V_elem_ch><q>")]
 780 )
 781
 782 (define_insn "neon_vcvt<NEON_VCVT:nvrint_variant><su_optab><VCVTF:mode><v_cmp_result>"
 783   [(set (match_operand:<V_cmp_result> 0 "register_operand" "=w")
 784         (FIXUORS:<V_cmp_result> (unspec:VCVTF
 785                                [(match_operand:VCVTF 1 "register_operand" "w")]
 786                                NEON_VCVT)))]
 787   "TARGET_NEON && TARGET_VFP5"
 788   "vcvt<nvrint_variant>.<su>32.f32\\t%<V_reg>0, %<V_reg>1"
 789   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")
 790    (set_attr "predicable" "no")]
 791 )
 792
 793 (define_insn "ior<mode>3"
 794   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 795         (ior:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
 796                  (match_operand:VDQ 2 "neon_logic_op2" "w,Dl")))]
 797   "TARGET_NEON"
 798 {
 799   switch (which_alternative)
 800     {
 801     case 0: return "vorr\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
 802     case 1: return neon_output_logic_immediate ("vorr", &operands[2],
 803                      <MODE>mode, 0, VALID_NEON_QREG_MODE (<MODE>mode));
 804     default: gcc_unreachable ();
 805     }
 806 }
 807   [(set_attr "type" "neon_logic<q>")]
 808 )
 809
 810 ;; The concrete forms of the Neon immediate-logic instructions are vbic and
 811 ;; vorr. We support the pseudo-instruction vand instead, because that
 812 ;; corresponds to the canonical form the middle-end expects to use for
 813 ;; immediate bitwise-ANDs.
 814
 815 (define_insn "and<mode>3"
 816   [(set (match_operand:VDQ 0 "s_register_operand" "=w,w")
 817         (and:VDQ (match_operand:VDQ 1 "s_register_operand" "w,0")
 818                  (match_operand:VDQ 2 "neon_inv_logic_op2" "w,DL")))]
 819   "TARGET_NEON"
 820 {
 821   switch (which_alternative)
 822     {
 823     case 0: return "vand\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
 824     case 1: return neon_output_logic_immediate ("vand", &operands[2],
 825                      <MODE>mode, 1, VALID_NEON_QREG_MODE (<MODE>mode));
 826     default: gcc_unreachable ();
 827     }
 828 }
 829   [(set_attr "type" "neon_logic<q>")]
 830 )
 831
 832 (define_insn "orn<mode>3_neon"
 833   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 834         (ior:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
 835                  (match_operand:VDQ 1 "s_register_operand" "w")))]
 836   "TARGET_NEON"
 837   "vorn\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 838   [(set_attr "type" "neon_logic<q>")]
 839 )
 840
 841 (define_insn "bic<mode>3_neon"
 842   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 843         (and:VDQ (not:VDQ (match_operand:VDQ 2 "s_register_operand" "w"))
 844                  (match_operand:VDQ 1 "s_register_operand" "w")))]
 845   "TARGET_NEON"
 846   "vbic\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 847   [(set_attr "type" "neon_logic<q>")]
 848 )
 849
 850 (define_insn "xor<mode>3"
 851   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 852         (xor:VDQ (match_operand:VDQ 1 "s_register_operand" "w")
 853                  (match_operand:VDQ 2 "s_register_operand" "w")))]
 854   "TARGET_NEON"
 855   "veor\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 856   [(set_attr "type" "neon_logic<q>")]
 857 )
 858
 859 (define_insn "one_cmpl<mode>2"
 860   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
 861         (not:VDQ (match_operand:VDQ 1 "s_register_operand" "w")))]
 862   "TARGET_NEON"
 863   "vmvn\t%<V_reg>0, %<V_reg>1"
 864   [(set_attr "type" "neon_move<q>")]
 865 )
 866
 867 (define_insn "abs<mode>2"
 868   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 869         (abs:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
 870   "TARGET_NEON"
 871   "vabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
 872   [(set (attr "type")
 873       (if_then_else (match_test "<Is_float_mode>")
 874                     (const_string "neon_fp_abs_s<q>")
 875                     (const_string "neon_abs<q>")))]
 876 )
 877
 878 (define_insn "neg<mode>2"
 879   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 880         (neg:VDQW (match_operand:VDQW 1 "s_register_operand" "w")))]
 881   "TARGET_NEON"
 882   "vneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
 883   [(set (attr "type")
 884       (if_then_else (match_test "<Is_float_mode>")
 885                     (const_string "neon_fp_neg_s<q>")
 886                     (const_string "neon_neg<q>")))]
 887 )
 888
 889 (define_insn "negdi2_neon"
 890   [(set (match_operand:DI 0 "s_register_operand"         "=&w, w,r,&r")
 891         (neg:DI (match_operand:DI 1 "s_register_operand" "  w, w,0, r")))
 892    (clobber (match_scratch:DI 2                          "= X,&w,X, X"))
 893    (clobber (reg:CC CC_REGNUM))]
 894   "TARGET_NEON"
 895   "#"
 896   [(set_attr "length" "8")
 897    (set_attr "type" "multiple")]
 898 )
 899
 900 ; Split negdi2_neon for vfp registers
 901 (define_split
 902   [(set (match_operand:DI 0 "s_register_operand" "")
 903         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
 904    (clobber (match_scratch:DI 2 ""))
 905    (clobber (reg:CC CC_REGNUM))]
 906   "TARGET_NEON && reload_completed && IS_VFP_REGNUM (REGNO (operands[0]))"
 907   [(set (match_dup 2) (const_int 0))
 908    (parallel [(set (match_dup 0) (minus:DI (match_dup 2) (match_dup 1)))
 909               (clobber (reg:CC CC_REGNUM))])]
 910   {
 911     if (!REG_P (operands[2]))
 912       operands[2] = operands[0];
 913   }
 914 )
 915
 916 ; Split negdi2_neon for core registers
 917 (define_split
 918   [(set (match_operand:DI 0 "s_register_operand" "")
 919         (neg:DI (match_operand:DI 1 "s_register_operand" "")))
 920    (clobber (match_scratch:DI 2 ""))
 921    (clobber (reg:CC CC_REGNUM))]
 922   "TARGET_32BIT && reload_completed
 923    && arm_general_register_operand (operands[0], DImode)"
 924   [(parallel [(set (match_dup 0) (neg:DI (match_dup 1)))
 925               (clobber (reg:CC CC_REGNUM))])]
 926   ""
 927 )
 928
 929 (define_insn "<absneg_str><mode>2"
 930   [(set (match_operand:VH 0 "s_register_operand" "=w")
 931     (ABSNEG:VH (match_operand:VH 1 "s_register_operand" "w")))]
 932  "TARGET_NEON_FP16INST"
 933  "v<absneg_str>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
 934  [(set_attr "type" "neon_abs<q>")]
 935 )
 936
 937 (define_expand "neon_v<absneg_str><mode>"
 938  [(set
 939    (match_operand:VH 0 "s_register_operand")
 940    (ABSNEG:VH (match_operand:VH 1 "s_register_operand")))]
 941  "TARGET_NEON_FP16INST"
 942 {
 943   emit_insn (gen_<absneg_str><mode>2 (operands[0], operands[1]));
 944   DONE;
 945 })
 946
 947 (define_insn "neon_v<fp16_rnd_str><mode>"
 948   [(set (match_operand:VH 0 "s_register_operand" "=w")
 949     (unspec:VH
 950      [(match_operand:VH 1 "s_register_operand" "w")]
 951      FP16_RND))]
 952  "TARGET_NEON_FP16INST"
 953  "<fp16_rnd_insn>.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
 954  [(set_attr "type" "neon_fp_round_s<q>")]
 955 )
 956
 957 (define_insn "neon_vrsqrte<mode>"
 958   [(set (match_operand:VH 0 "s_register_operand" "=w")
 959     (unspec:VH
 960      [(match_operand:VH 1 "s_register_operand" "w")]
 961      UNSPEC_VRSQRTE))]
 962   "TARGET_NEON_FP16INST"
 963   "vrsqrte.f16\t%<V_reg>0, %<V_reg>1"
 964  [(set_attr "type" "neon_fp_rsqrte_s<q>")]
 965 )
 966
 967 (define_insn "*umin<mode>3_neon"
 968   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
 969         (umin:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
 970                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
 971   "TARGET_NEON"
 972   "vmin.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 973   [(set_attr "type" "neon_minmax<q>")]
 974 )
 975
 976 (define_insn "*umax<mode>3_neon"
 977   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
 978         (umax:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
 979                     (match_operand:VDQIW 2 "s_register_operand" "w")))]
 980   "TARGET_NEON"
 981   "vmax.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 982   [(set_attr "type" "neon_minmax<q>")]
 983 )
 984
 985 (define_insn "*smin<mode>3_neon"
 986   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 987         (smin:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
 988                    (match_operand:VDQW 2 "s_register_operand" "w")))]
 989   "TARGET_NEON"
 990   "vmin.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
 991   [(set (attr "type")
 992       (if_then_else (match_test "<Is_float_mode>")
 993                     (const_string "neon_fp_minmax_s<q>")
 994                     (const_string "neon_minmax<q>")))]
 995 )
 996
 997 (define_insn "*smax<mode>3_neon"
 998   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
 999         (smax:VDQW (match_operand:VDQW 1 "s_register_operand" "w")
1000                    (match_operand:VDQW 2 "s_register_operand" "w")))]
1001   "TARGET_NEON"
1002   "vmax.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1003   [(set (attr "type")
1004       (if_then_else (match_test "<Is_float_mode>")
1005                     (const_string "neon_fp_minmax_s<q>")
1006                     (const_string "neon_minmax<q>")))]
1007 )
1008
1009 ; TODO: V2DI shifts are current disabled because there are bugs in the
1010 ; generic vectorizer code.  It ends up creating a V2DI constructor with
1011 ; SImode elements.
1012
1013 (define_insn "vashl<mode>3"
1014   [(set (match_operand:VDQIW 0 "s_register_operand" "=w,w")
1015         (ashift:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w,w")
1016                       (match_operand:VDQIW 2 "imm_lshift_or_reg_neon" "w,Dm")))]
1017   "TARGET_NEON"
1018   {
1019     switch (which_alternative)
1020       {
1021         case 0: return "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2";
1022         case 1: return neon_output_shift_immediate ("vshl", 'i', &operands[2],
1023                                                     <MODE>mode,
1024                                                     VALID_NEON_QREG_MODE (<MODE>mode),
1025                                                     true);
1026         default: gcc_unreachable ();
1027       }
1028   }
1029   [(set_attr "type" "neon_shift_reg<q>, neon_shift_imm<q>")]
1030 )
1031
1032 (define_insn "vashr<mode>3_imm"
1033   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1034         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1035                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1036   "TARGET_NEON"
1037   {
1038     return neon_output_shift_immediate ("vshr", 's', &operands[2],
1039                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1040                                         false);
1041   }
1042   [(set_attr "type" "neon_shift_imm<q>")]
1043 )
1044
1045 (define_insn "vlshr<mode>3_imm"
1046   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1047         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")
1048                         (match_operand:VDQIW 2 "imm_for_neon_rshift_operand" "Dm")))]
1049   "TARGET_NEON"
1050   {
1051     return neon_output_shift_immediate ("vshr", 'u', &operands[2],
1052                                         <MODE>mode, VALID_NEON_QREG_MODE (<MODE>mode),
1053                                         false);
1054   }
1055   [(set_attr "type" "neon_shift_imm<q>")]
1056 )
1057
1058 ; Used for implementing logical shift-right, which is a left-shift by a negative
1059 ; amount, with signed operands. This is essentially the same as ashl<mode>3
1060 ; above, but using an unspec in case GCC tries anything tricky with negative
1061 ; shift amounts.
1062
1063 (define_insn "ashl<mode>3_signed"
1064   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1065         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1066                       (match_operand:VDQI 2 "s_register_operand" "w")]
1067                      UNSPEC_ASHIFT_SIGNED))]
1068   "TARGET_NEON"
1069   "vshl.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1070   [(set_attr "type" "neon_shift_reg<q>")]
1071 )
1072
1073 ; Used for implementing logical shift-right, which is a left-shift by a negative
1074 ; amount, with unsigned operands.
1075
1076 (define_insn "ashl<mode>3_unsigned"
1077   [(set (match_operand:VDQI 0 "s_register_operand" "=w")
1078         (unspec:VDQI [(match_operand:VDQI 1 "s_register_operand" "w")
1079                       (match_operand:VDQI 2 "s_register_operand" "w")]
1080                      UNSPEC_ASHIFT_UNSIGNED))]
1081   "TARGET_NEON"
1082   "vshl.<V_u_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1083   [(set_attr "type" "neon_shift_reg<q>")]
1084 )
1085
1086 (define_expand "vashr<mode>3"
1087   [(set (match_operand:VDQIW 0 "s_register_operand")
1088         (ashiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1089                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1090   "TARGET_NEON"
1091 {
1092   if (s_register_operand (operands[2], <MODE>mode))
1093     {
1094       rtx neg = gen_reg_rtx (<MODE>mode);
1095       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1096       emit_insn (gen_ashl<mode>3_signed (operands[0], operands[1], neg));
1097     }
1098   else
1099     emit_insn (gen_vashr<mode>3_imm (operands[0], operands[1], operands[2]));
1100   DONE;
1101 })
1102
1103 (define_expand "vlshr<mode>3"
1104   [(set (match_operand:VDQIW 0 "s_register_operand")
1105         (lshiftrt:VDQIW (match_operand:VDQIW 1 "s_register_operand")
1106                         (match_operand:VDQIW 2 "imm_rshift_or_reg_neon")))]
1107   "TARGET_NEON"
1108 {
1109   if (s_register_operand (operands[2], <MODE>mode))
1110     {
1111       rtx neg = gen_reg_rtx (<MODE>mode);
1112       emit_insn (gen_neg<mode>2 (neg, operands[2]));
1113       emit_insn (gen_ashl<mode>3_unsigned (operands[0], operands[1], neg));
1114     }
1115   else
1116     emit_insn (gen_vlshr<mode>3_imm (operands[0], operands[1], operands[2]));
1117   DONE;
1118 })
1119
1120 ;; 64-bit shifts
1121
1122 ;; This pattern loads a 32-bit shift count into a 64-bit NEON register,
1123 ;; leaving the upper half uninitalized.  This is OK since the shift
1124 ;; instruction only looks at the low 8 bits anyway.  To avoid confusing
1125 ;; data flow analysis however, we pretend the full register is set
1126 ;; using an unspec.
1127 (define_insn "neon_load_count"
1128   [(set (match_operand:DI 0 "s_register_operand" "=w,w")
1129         (unspec:DI [(match_operand:SI 1 "nonimmediate_operand" "Um,r")]
1130                    UNSPEC_LOAD_COUNT))]
1131   "TARGET_NEON"
1132   "@
1133    vld1.32\t{%P0[0]}, %A1
1134    vmov.32\t%P0[0], %1"
1135   [(set_attr "type" "neon_load1_1reg,neon_from_gp")]
1136 )
1137
1138 ;; Widening operations
1139
1140 (define_expand "widen_ssum<mode>3"
1141   [(set (match_operand:<V_double_width> 0 "s_register_operand")
1142         (plus:<V_double_width>
1143          (sign_extend:<V_double_width>
1144           (match_operand:VQI 1 "s_register_operand"))
1145          (match_operand:<V_double_width> 2 "s_register_operand")))]
1146   "TARGET_NEON"
1147   {
1148     machine_mode mode = GET_MODE (operands[1]);
1149     rtx p1, p2;
1150
1151     p1  = arm_simd_vect_par_cnst_half (mode, false);
1152     p2  = arm_simd_vect_par_cnst_half (mode, true);
1153
1154     if (operands[0] != operands[2])
1155       emit_move_insn (operands[0], operands[2]);
1156
1157     emit_insn (gen_vec_sel_widen_ssum_lo<mode><V_half>3 (operands[0],
1158                                                          operands[1],
1159                                                          p1,
1160                                                          operands[0]));
1161     emit_insn (gen_vec_sel_widen_ssum_hi<mode><V_half>3 (operands[0],
1162                                                          operands[1],
1163                                                          p2,
1164                                                          operands[0]));
1165     DONE;
1166   }
1167 )
1168
1169 (define_insn "vec_sel_widen_ssum_lo<mode><V_half>3"
1170   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1171         (plus:<V_double_width>
1172          (sign_extend:<V_double_width>
1173           (vec_select:<V_HALF>
1174            (match_operand:VQI 1 "s_register_operand" "%w")
1175            (match_operand:VQI 2 "vect_par_constant_low" "")))
1176          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1177   "TARGET_NEON"
1178 {
1179   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %f1" :
1180     "vaddw.<V_s_elem>\t%q0, %q3, %e1";
1181 }
1182   [(set_attr "type" "neon_add_widen")])
1183
1184 (define_insn "vec_sel_widen_ssum_hi<mode><V_half>3"
1185   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1186         (plus:<V_double_width>
1187          (sign_extend:<V_double_width>
1188           (vec_select:<V_HALF>
1189                          (match_operand:VQI 1 "s_register_operand" "%w")
1190                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1191          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1192   "TARGET_NEON"
1193 {
1194   return BYTES_BIG_ENDIAN ?  "vaddw.<V_s_elem>\t%q0, %q3, %e1" :
1195     "vaddw.<V_s_elem>\t%q0, %q3, %f1";
1196 }
1197   [(set_attr "type" "neon_add_widen")])
1198
1199 (define_insn "widen_ssum<mode>3"
1200   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1201         (plus:<V_widen>
1202          (sign_extend:<V_widen>
1203           (match_operand:VW 1 "s_register_operand" "%w"))
1204          (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1205   "TARGET_NEON"
1206   "vaddw.<V_s_elem>\t%q0, %q2, %P1"
1207   [(set_attr "type" "neon_add_widen")]
1208 )
1209
1210 (define_expand "widen_usum<mode>3"
1211   [(set (match_operand:<V_double_width> 0 "s_register_operand")
1212         (plus:<V_double_width>
1213          (zero_extend:<V_double_width>
1214           (match_operand:VQI 1 "s_register_operand"))
1215          (match_operand:<V_double_width> 2 "s_register_operand")))]
1216   "TARGET_NEON"
1217   {
1218     machine_mode mode = GET_MODE (operands[1]);
1219     rtx p1, p2;
1220
1221     p1  = arm_simd_vect_par_cnst_half (mode, false);
1222     p2  = arm_simd_vect_par_cnst_half (mode, true);
1223
1224     if (operands[0] != operands[2])
1225       emit_move_insn (operands[0], operands[2]);
1226
1227     emit_insn (gen_vec_sel_widen_usum_lo<mode><V_half>3 (operands[0],
1228                                                          operands[1],
1229                                                          p1,
1230                                                          operands[0]));
1231     emit_insn (gen_vec_sel_widen_usum_hi<mode><V_half>3 (operands[0],
1232                                                          operands[1],
1233                                                          p2,
1234                                                          operands[0]));
1235     DONE;
1236   }
1237 )
1238
1239 (define_insn "vec_sel_widen_usum_lo<mode><V_half>3"
1240   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1241         (plus:<V_double_width>
1242          (zero_extend:<V_double_width>
1243           (vec_select:<V_HALF>
1244            (match_operand:VQI 1 "s_register_operand" "%w")
1245            (match_operand:VQI 2 "vect_par_constant_low" "")))
1246          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1247   "TARGET_NEON"
1248 {
1249   return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %f1" :
1250     "vaddw.<V_u_elem>\t%q0, %q3, %e1";
1251 }
1252   [(set_attr "type" "neon_add_widen")])
1253
1254 (define_insn "vec_sel_widen_usum_hi<mode><V_half>3"
1255   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
1256         (plus:<V_double_width>
1257          (zero_extend:<V_double_width>
1258           (vec_select:<V_HALF>
1259                          (match_operand:VQI 1 "s_register_operand" "%w")
1260                          (match_operand:VQI 2 "vect_par_constant_high" "")))
1261          (match_operand:<V_double_width> 3 "s_register_operand" "0")))]
1262   "TARGET_NEON"
1263 {
1264  return BYTES_BIG_ENDIAN ?  "vaddw.<V_u_elem>\t%q0, %q3, %e1" :
1265     "vaddw.<V_u_elem>\t%q0, %q3, %f1";
1266 }
1267   [(set_attr "type" "neon_add_widen")])
1268
1269 (define_insn "widen_usum<mode>3"
1270   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1271         (plus:<V_widen> (zero_extend:<V_widen>
1272                           (match_operand:VW 1 "s_register_operand" "%w"))
1273                         (match_operand:<V_widen> 2 "s_register_operand" "w")))]
1274   "TARGET_NEON"
1275   "vaddw.<V_u_elem>\t%q0, %q2, %P1"
1276   [(set_attr "type" "neon_add_widen")]
1277 )
1278
1279 ;; Helpers for quad-word reduction operations
1280
1281 ; Add (or smin, smax...) the low N/2 elements of the N-element vector
1282 ; operand[1] to the high N/2 elements of same. Put the result in operand[0], an
1283 ; N/2-element vector.
1284
1285 (define_insn "quad_halves_<code>v4si"
1286   [(set (match_operand:V2SI 0 "s_register_operand" "=w")
1287         (VQH_OPS:V2SI
1288           (vec_select:V2SI (match_operand:V4SI 1 "s_register_operand" "w")
1289                            (parallel [(const_int 0) (const_int 1)]))
1290           (vec_select:V2SI (match_dup 1)
1291                            (parallel [(const_int 2) (const_int 3)]))))]
1292   "TARGET_NEON"
1293   "<VQH_mnem>.<VQH_sign>32\t%P0, %e1, %f1"
1294   [(set_attr "vqh_mnem" "<VQH_mnem>")
1295    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1296 )
1297
1298 (define_insn "quad_halves_<code>v4sf"
1299   [(set (match_operand:V2SF 0 "s_register_operand" "=w")
1300         (VQHS_OPS:V2SF
1301           (vec_select:V2SF (match_operand:V4SF 1 "s_register_operand" "w")
1302                            (parallel [(const_int 0) (const_int 1)]))
1303           (vec_select:V2SF (match_dup 1)
1304                            (parallel [(const_int 2) (const_int 3)]))))]
1305   "TARGET_NEON && flag_unsafe_math_optimizations"
1306   "<VQH_mnem>.f32\t%P0, %e1, %f1"
1307   [(set_attr "vqh_mnem" "<VQH_mnem>")
1308    (set_attr "type" "neon_fp_reduc_<VQH_type>_s_q")]
1309 )
1310
1311 (define_insn "quad_halves_<code>v8hi"
1312   [(set (match_operand:V4HI 0 "s_register_operand" "+w")
1313         (VQH_OPS:V4HI
1314           (vec_select:V4HI (match_operand:V8HI 1 "s_register_operand" "w")
1315                            (parallel [(const_int 0) (const_int 1)
1316                                       (const_int 2) (const_int 3)]))
1317           (vec_select:V4HI (match_dup 1)
1318                            (parallel [(const_int 4) (const_int 5)
1319                                       (const_int 6) (const_int 7)]))))]
1320   "TARGET_NEON"
1321   "<VQH_mnem>.<VQH_sign>16\t%P0, %e1, %f1"
1322   [(set_attr "vqh_mnem" "<VQH_mnem>")
1323    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1324 )
1325
1326 (define_insn "quad_halves_<code>v16qi"
1327   [(set (match_operand:V8QI 0 "s_register_operand" "+w")
1328         (VQH_OPS:V8QI
1329           (vec_select:V8QI (match_operand:V16QI 1 "s_register_operand" "w")
1330                            (parallel [(const_int 0) (const_int 1)
1331                                       (const_int 2) (const_int 3)
1332                                       (const_int 4) (const_int 5)
1333                                       (const_int 6) (const_int 7)]))
1334           (vec_select:V8QI (match_dup 1)
1335                            (parallel [(const_int 8) (const_int 9)
1336                                       (const_int 10) (const_int 11)
1337                                       (const_int 12) (const_int 13)
1338                                       (const_int 14) (const_int 15)]))))]
1339   "TARGET_NEON"
1340   "<VQH_mnem>.<VQH_sign>8\t%P0, %e1, %f1"
1341   [(set_attr "vqh_mnem" "<VQH_mnem>")
1342    (set_attr "type" "neon_reduc_<VQH_type>_q")]
1343 )
1344
1345 (define_expand "move_hi_quad_<mode>"
1346  [(match_operand:ANY128 0 "s_register_operand")
1347   (match_operand:<V_HALF> 1 "s_register_operand")]
1348  "TARGET_NEON"
1349 {
1350   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0], <MODE>mode,
1351                                        GET_MODE_SIZE (<V_HALF>mode)),
1352                   operands[1]);
1353   DONE;
1354 })
1355
1356 (define_expand "move_lo_quad_<mode>"
1357  [(match_operand:ANY128 0 "s_register_operand")
1358   (match_operand:<V_HALF> 1 "s_register_operand")]
1359  "TARGET_NEON"
1360 {
1361   emit_move_insn (simplify_gen_subreg (<V_HALF>mode, operands[0],
1362                                        <MODE>mode, 0),
1363                   operands[1]);
1364   DONE;
1365 })
1366
1367 ;; Reduction operations
1368
1369 (define_expand "reduc_plus_scal_<mode>"
1370   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1371    (match_operand:VD 1 "s_register_operand")]
1372   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1373 {
1374   rtx vec = gen_reg_rtx (<MODE>mode);
1375   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1376                         &gen_neon_vpadd_internal<mode>);
1377   /* The same result is actually computed into every element.  */
1378   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1379   DONE;
1380 })
1381
1382 (define_expand "reduc_plus_scal_<mode>"
1383   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1384    (match_operand:VQ 1 "s_register_operand")]
1385   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1386    && !BYTES_BIG_ENDIAN"
1387 {
1388   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1389
1390   emit_insn (gen_quad_halves_plus<mode> (step1, operands[1]));
1391   emit_insn (gen_reduc_plus_scal_<V_half> (operands[0], step1));
1392
1393   DONE;
1394 })
1395
1396 (define_expand "reduc_plus_scal_v2di"
1397   [(match_operand:DI 0 "nonimmediate_operand")
1398    (match_operand:V2DI 1 "s_register_operand")]
1399   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1400 {
1401   rtx vec = gen_reg_rtx (V2DImode);
1402
1403   emit_insn (gen_arm_reduc_plus_internal_v2di (vec, operands[1]));
1404   emit_insn (gen_vec_extractv2didi (operands[0], vec, const0_rtx));
1405
1406   DONE;
1407 })
1408
1409 (define_insn "arm_reduc_plus_internal_v2di"
1410   [(set (match_operand:V2DI 0 "s_register_operand" "=w")
1411         (unspec:V2DI [(match_operand:V2DI 1 "s_register_operand" "w")]
1412                      UNSPEC_VPADD))]
1413   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1414   "vadd.i64\t%e0, %e1, %f1"
1415   [(set_attr "type" "neon_add_q")]
1416 )
1417
1418 (define_expand "reduc_smin_scal_<mode>"
1419   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1420    (match_operand:VD 1 "s_register_operand")]
1421   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1422 {
1423   rtx vec = gen_reg_rtx (<MODE>mode);
1424
1425   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1426                         &gen_neon_vpsmin<mode>);
1427   /* The result is computed into every element of the vector.  */
1428   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1429   DONE;
1430 })
1431
1432 (define_expand "reduc_smin_scal_<mode>"
1433   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1434    (match_operand:VQ 1 "s_register_operand")]
1435   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1436    && !BYTES_BIG_ENDIAN"
1437 {
1438   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1439
1440   emit_insn (gen_quad_halves_smin<mode> (step1, operands[1]));
1441   emit_insn (gen_reduc_smin_scal_<V_half> (operands[0], step1));
1442
1443   DONE;
1444 })
1445
1446 (define_expand "reduc_smax_scal_<mode>"
1447   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1448    (match_operand:VD 1 "s_register_operand")]
1449   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1450 {
1451   rtx vec = gen_reg_rtx (<MODE>mode);
1452   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1453                         &gen_neon_vpsmax<mode>);
1454   /* The result is computed into every element of the vector.  */
1455   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1456   DONE;
1457 })
1458
1459 (define_expand "reduc_smax_scal_<mode>"
1460   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1461    (match_operand:VQ 1 "s_register_operand")]
1462   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)
1463    && !BYTES_BIG_ENDIAN"
1464 {
1465   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1466
1467   emit_insn (gen_quad_halves_smax<mode> (step1, operands[1]));
1468   emit_insn (gen_reduc_smax_scal_<V_half> (operands[0], step1));
1469
1470   DONE;
1471 })
1472
1473 (define_expand "reduc_umin_scal_<mode>"
1474   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1475    (match_operand:VDI 1 "s_register_operand")]
1476   "TARGET_NEON"
1477 {
1478   rtx vec = gen_reg_rtx (<MODE>mode);
1479   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1480                         &gen_neon_vpumin<mode>);
1481   /* The result is computed into every element of the vector.  */
1482   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1483   DONE;
1484 })
1485
1486 (define_expand "reduc_umin_scal_<mode>"
1487   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1488    (match_operand:VQI 1 "s_register_operand")]
1489   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1490 {
1491   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1492
1493   emit_insn (gen_quad_halves_umin<mode> (step1, operands[1]));
1494   emit_insn (gen_reduc_umin_scal_<V_half> (operands[0], step1));
1495
1496   DONE;
1497 })
1498
1499 (define_expand "reduc_umax_scal_<mode>"
1500   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1501    (match_operand:VDI 1 "s_register_operand")]
1502   "TARGET_NEON"
1503 {
1504   rtx vec = gen_reg_rtx (<MODE>mode);
1505   neon_pairwise_reduce (vec, operands[1], <MODE>mode,
1506                         &gen_neon_vpumax<mode>);
1507   /* The result is computed into every element of the vector.  */
1508   emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], vec, const0_rtx));
1509   DONE;
1510 })
1511
1512 (define_expand "reduc_umax_scal_<mode>"
1513   [(match_operand:<V_elem> 0 "nonimmediate_operand")
1514    (match_operand:VQI 1 "s_register_operand")]
1515   "TARGET_NEON && !BYTES_BIG_ENDIAN"
1516 {
1517   rtx step1 = gen_reg_rtx (<V_HALF>mode);
1518
1519   emit_insn (gen_quad_halves_umax<mode> (step1, operands[1]));
1520   emit_insn (gen_reduc_umax_scal_<V_half> (operands[0], step1));
1521
1522   DONE;
1523 })
1524
1525 (define_insn "neon_vpadd_internal<mode>"
1526   [(set (match_operand:VD 0 "s_register_operand" "=w")
1527         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1528                     (match_operand:VD 2 "s_register_operand" "w")]
1529                    UNSPEC_VPADD))]
1530   "TARGET_NEON"
1531   "vpadd.<V_if_elem>\t%P0, %P1, %P2"
1532   ;; Assume this schedules like vadd.
1533   [(set (attr "type")
1534       (if_then_else (match_test "<Is_float_mode>")
1535                     (const_string "neon_fp_reduc_add_s<q>")
1536                     (const_string "neon_reduc_add<q>")))]
1537 )
1538
1539 (define_insn "neon_vpaddv4hf"
1540  [(set
1541    (match_operand:V4HF 0 "s_register_operand" "=w")
1542    (unspec:V4HF [(match_operand:V4HF 1 "s_register_operand" "w")
1543                  (match_operand:V4HF 2 "s_register_operand" "w")]
1544     UNSPEC_VPADD))]
1545  "TARGET_NEON_FP16INST"
1546  "vpadd.f16\t%P0, %P1, %P2"
1547  [(set_attr "type" "neon_reduc_add")]
1548 )
1549
1550 (define_insn "neon_vpsmin<mode>"
1551   [(set (match_operand:VD 0 "s_register_operand" "=w")
1552         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1553                     (match_operand:VD 2 "s_register_operand" "w")]
1554                    UNSPEC_VPSMIN))]
1555   "TARGET_NEON"
1556   "vpmin.<V_s_elem>\t%P0, %P1, %P2"
1557   [(set (attr "type")
1558       (if_then_else (match_test "<Is_float_mode>")
1559                     (const_string "neon_fp_reduc_minmax_s<q>")
1560                     (const_string "neon_reduc_minmax<q>")))]
1561 )
1562
1563 (define_insn "neon_vpsmax<mode>"
1564   [(set (match_operand:VD 0 "s_register_operand" "=w")
1565         (unspec:VD [(match_operand:VD 1 "s_register_operand" "w")
1566                     (match_operand:VD 2 "s_register_operand" "w")]
1567                    UNSPEC_VPSMAX))]
1568   "TARGET_NEON"
1569   "vpmax.<V_s_elem>\t%P0, %P1, %P2"
1570   [(set (attr "type")
1571       (if_then_else (match_test "<Is_float_mode>")
1572                     (const_string "neon_fp_reduc_minmax_s<q>")
1573                     (const_string "neon_reduc_minmax<q>")))]
1574 )
1575
1576 (define_insn "neon_vpumin<mode>"
1577   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1578         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1579                      (match_operand:VDI 2 "s_register_operand" "w")]
1580                    UNSPEC_VPUMIN))]
1581   "TARGET_NEON"
1582   "vpmin.<V_u_elem>\t%P0, %P1, %P2"
1583   [(set_attr "type" "neon_reduc_minmax<q>")]
1584 )
1585
1586 (define_insn "neon_vpumax<mode>"
1587   [(set (match_operand:VDI 0 "s_register_operand" "=w")
1588         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
1589                      (match_operand:VDI 2 "s_register_operand" "w")]
1590                    UNSPEC_VPUMAX))]
1591   "TARGET_NEON"
1592   "vpmax.<V_u_elem>\t%P0, %P1, %P2"
1593   [(set_attr "type" "neon_reduc_minmax<q>")]
1594 )
1595
1596 ;; Saturating arithmetic
1597
1598 ; NOTE: Neon supports many more saturating variants of instructions than the
1599 ; following, but these are all GCC currently understands.
1600 ; FIXME: Actually, GCC doesn't know how to create saturating add/sub by itself
1601 ; yet either, although these patterns may be used by intrinsics when they're
1602 ; added.
1603
1604 (define_insn "*ss_add<mode>_neon"
1605   [(set (match_operand:VD 0 "s_register_operand" "=w")
1606        (ss_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1607                    (match_operand:VD 2 "s_register_operand" "w")))]
1608   "TARGET_NEON"
1609   "vqadd.<V_s_elem>\t%P0, %P1, %P2"
1610   [(set_attr "type" "neon_qadd<q>")]
1611 )
1612
1613 (define_insn "*us_add<mode>_neon"
1614   [(set (match_operand:VD 0 "s_register_operand" "=w")
1615        (us_plus:VD (match_operand:VD 1 "s_register_operand" "w")
1616                    (match_operand:VD 2 "s_register_operand" "w")))]
1617   "TARGET_NEON"
1618   "vqadd.<V_u_elem>\t%P0, %P1, %P2"
1619   [(set_attr "type" "neon_qadd<q>")]
1620 )
1621
1622 (define_insn "*ss_sub<mode>_neon"
1623   [(set (match_operand:VD 0 "s_register_operand" "=w")
1624        (ss_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1625                     (match_operand:VD 2 "s_register_operand" "w")))]
1626   "TARGET_NEON"
1627   "vqsub.<V_s_elem>\t%P0, %P1, %P2"
1628   [(set_attr "type" "neon_qsub<q>")]
1629 )
1630
1631 (define_insn "*us_sub<mode>_neon"
1632   [(set (match_operand:VD 0 "s_register_operand" "=w")
1633        (us_minus:VD (match_operand:VD 1 "s_register_operand" "w")
1634                     (match_operand:VD 2 "s_register_operand" "w")))]
1635   "TARGET_NEON"
1636   "vqsub.<V_u_elem>\t%P0, %P1, %P2"
1637   [(set_attr "type" "neon_qsub<q>")]
1638 )
1639
1640 ;; Conditional instructions.  These are comparisons with conditional moves for
1641 ;; vectors.  They perform the assignment:
1642 ;;
1643 ;;     Vop0 = (Vop4 <op3> Vop5) ? Vop1 : Vop2;
1644 ;;
1645 ;; where op3 is <, <=, ==, !=, >= or >.  Operations are performed
1646 ;; element-wise.
1647
1648 (define_expand "vcond<mode><mode>"
1649   [(set (match_operand:VDQW 0 "s_register_operand")
1650         (if_then_else:VDQW
1651           (match_operator 3 "comparison_operator"
1652             [(match_operand:VDQW 4 "s_register_operand")
1653              (match_operand:VDQW 5 "nonmemory_operand")])
1654           (match_operand:VDQW 1 "s_register_operand")
1655           (match_operand:VDQW 2 "s_register_operand")))]
1656   "TARGET_NEON && (!<Is_float_mode> || flag_unsafe_math_optimizations)"
1657 {
1658   int inverse = 0;
1659   int use_zero_form = 0;
1660   int swap_bsl_operands = 0;
1661   rtx mask = gen_reg_rtx (<V_cmp_result>mode);
1662   rtx tmp = gen_reg_rtx (<V_cmp_result>mode);
1663
1664   rtx (*base_comparison) (rtx, rtx, rtx);
1665   rtx (*complimentary_comparison) (rtx, rtx, rtx);
1666
1667   switch (GET_CODE (operands[3]))
1668     {
1669     case GE:
1670     case GT:
1671     case LE:
1672     case LT:
1673     case EQ:
1674       if (operands[5] == CONST0_RTX (<MODE>mode))
1675         {
1676           use_zero_form = 1;
1677           break;
1678         }
1679       /* Fall through.  */
1680     default:
1681       if (!REG_P (operands[5]))
1682         operands[5] = force_reg (<MODE>mode, operands[5]);
1683     }
1684
1685   switch (GET_CODE (operands[3]))
1686     {
1687     case LT:
1688     case UNLT:
1689       inverse = 1;
1690       /* Fall through.  */
1691     case GE:
1692     case UNGE:
1693     case ORDERED:
1694     case UNORDERED:
1695       base_comparison = gen_neon_vcge<mode>;
1696       complimentary_comparison = gen_neon_vcgt<mode>;
1697       break;
1698     case LE:
1699     case UNLE:
1700       inverse = 1;
1701       /* Fall through.  */
1702     case GT:
1703     case UNGT:
1704       base_comparison = gen_neon_vcgt<mode>;
1705       complimentary_comparison = gen_neon_vcge<mode>;
1706       break;
1707     case EQ:
1708     case NE:
1709     case UNEQ:
1710       base_comparison = gen_neon_vceq<mode>;
1711       complimentary_comparison = gen_neon_vceq<mode>;
1712       break;
1713     default:
1714       gcc_unreachable ();
1715     }
1716
1717   switch (GET_CODE (operands[3]))
1718     {
1719     case LT:
1720     case LE:
1721     case GT:
1722     case GE:
1723     case EQ:
1724       /* The easy case.  Here we emit one of vcge, vcgt or vceq.
1725          As a LT b <=> b GE a && a LE b <=> b GT a.  Our transformations are:
1726          a GE b -> a GE b
1727          a GT b -> a GT b
1728          a LE b -> b GE a
1729          a LT b -> b GT a
1730          a EQ b -> a EQ b
1731          Note that there also exist direct comparison against 0 forms,
1732          so catch those as a special case.  */
1733       if (use_zero_form)
1734         {
1735           inverse = 0;
1736           switch (GET_CODE (operands[3]))
1737             {
1738             case LT:
1739               base_comparison = gen_neon_vclt<mode>;
1740               break;
1741             case LE:
1742               base_comparison = gen_neon_vcle<mode>;
1743               break;
1744             default:
1745               /* Do nothing, other zero form cases already have the correct
1746                  base_comparison.  */
1747               break;
1748             }
1749         }
1750
1751       if (!inverse)
1752         emit_insn (base_comparison (mask, operands[4], operands[5]));
1753       else
1754         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1755       break;
1756     case UNLT:
1757     case UNLE:
1758     case UNGT:
1759     case UNGE:
1760     case NE:
1761       /* Vector compare returns false for lanes which are unordered, so if we use
1762          the inverse of the comparison we actually want to emit, then
1763          swap the operands to BSL, we will end up with the correct result.
1764          Note that a NE NaN and NaN NE b are true for all a, b.
1765
1766          Our transformations are:
1767          a GE b -> !(b GT a)
1768          a GT b -> !(b GE a)
1769          a LE b -> !(a GT b)
1770          a LT b -> !(a GE b)
1771          a NE b -> !(a EQ b)  */
1772
1773       if (inverse)
1774         emit_insn (base_comparison (mask, operands[4], operands[5]));
1775       else
1776         emit_insn (complimentary_comparison (mask, operands[5], operands[4]));
1777
1778       swap_bsl_operands = 1;
1779       break;
1780     case UNEQ:
1781       /* We check (a > b ||  b > a).  combining these comparisons give us
1782          true iff !(a != b && a ORDERED b), swapping the operands to BSL
1783          will then give us (a == b ||  a UNORDERED b) as intended.  */
1784
1785       emit_insn (gen_neon_vcgt<mode> (mask, operands[4], operands[5]));
1786       emit_insn (gen_neon_vcgt<mode> (tmp, operands[5], operands[4]));
1787       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1788       swap_bsl_operands = 1;
1789       break;
1790     case UNORDERED:
1791        /* Operands are ORDERED iff (a > b || b >= a).
1792          Swapping the operands to BSL will give the UNORDERED case.  */
1793      swap_bsl_operands = 1;
1794      /* Fall through.  */
1795     case ORDERED:
1796       emit_insn (gen_neon_vcgt<mode> (tmp, operands[4], operands[5]));
1797       emit_insn (gen_neon_vcge<mode> (mask, operands[5], operands[4]));
1798       emit_insn (gen_ior<v_cmp_result>3 (mask, mask, tmp));
1799       break;
1800     default:
1801       gcc_unreachable ();
1802     }
1803
1804   if (swap_bsl_operands)
1805     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1806                                     operands[1]));
1807   else
1808     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1809                                     operands[2]));
1810   DONE;
1811 })
1812
1813 (define_expand "vcondu<mode><mode>"
1814   [(set (match_operand:VDQIW 0 "s_register_operand")
1815         (if_then_else:VDQIW
1816           (match_operator 3 "arm_comparison_operator"
1817             [(match_operand:VDQIW 4 "s_register_operand")
1818              (match_operand:VDQIW 5 "s_register_operand")])
1819           (match_operand:VDQIW 1 "s_register_operand")
1820           (match_operand:VDQIW 2 "s_register_operand")))]
1821   "TARGET_NEON"
1822 {
1823   rtx mask;
1824   int inverse = 0, immediate_zero = 0;
1825
1826   mask = gen_reg_rtx (<V_cmp_result>mode);
1827
1828   if (operands[5] == CONST0_RTX (<MODE>mode))
1829     immediate_zero = 1;
1830   else if (!REG_P (operands[5]))
1831     operands[5] = force_reg (<MODE>mode, operands[5]);
1832
1833   switch (GET_CODE (operands[3]))
1834     {
1835     case GEU:
1836       emit_insn (gen_neon_vcgeu<mode> (mask, operands[4], operands[5]));
1837       break;
1838
1839     case GTU:
1840       emit_insn (gen_neon_vcgtu<mode> (mask, operands[4], operands[5]));
1841       break;
1842
1843     case EQ:
1844       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1845       break;
1846
1847     case LEU:
1848       if (immediate_zero)
1849         emit_insn (gen_neon_vcle<mode> (mask, operands[4], operands[5]));
1850       else
1851         emit_insn (gen_neon_vcgeu<mode> (mask, operands[5], operands[4]));
1852       break;
1853
1854     case LTU:
1855       if (immediate_zero)
1856         emit_insn (gen_neon_vclt<mode> (mask, operands[4], operands[5]));
1857       else
1858         emit_insn (gen_neon_vcgtu<mode> (mask, operands[5], operands[4]));
1859       break;
1860
1861     case NE:
1862       emit_insn (gen_neon_vceq<mode> (mask, operands[4], operands[5]));
1863       inverse = 1;
1864       break;
1865
1866     default:
1867       gcc_unreachable ();
1868     }
1869
1870   if (inverse)
1871     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[2],
1872                                     operands[1]));
1873   else
1874     emit_insn (gen_neon_vbsl<mode> (operands[0], mask, operands[1],
1875                                     operands[2]));
1876
1877   DONE;
1878 })
1879
1880 ;; Patterns for builtins.
1881
1882 ; good for plain vadd, vaddq.
1883
1884 (define_expand "neon_vadd<mode>"
1885   [(match_operand:VCVTF 0 "s_register_operand")
1886    (match_operand:VCVTF 1 "s_register_operand")
1887    (match_operand:VCVTF 2 "s_register_operand")]
1888   "TARGET_NEON"
1889 {
1890   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
1891     emit_insn (gen_add<mode>3 (operands[0], operands[1], operands[2]));
1892   else
1893     emit_insn (gen_neon_vadd<mode>_unspec (operands[0], operands[1],
1894                                            operands[2]));
1895   DONE;
1896 })
1897
1898 (define_expand "neon_vadd<mode>"
1899   [(match_operand:VH 0 "s_register_operand")
1900    (match_operand:VH 1 "s_register_operand")
1901    (match_operand:VH 2 "s_register_operand")]
1902   "TARGET_NEON_FP16INST"
1903 {
1904   emit_insn (gen_add<mode>3_fp16 (operands[0], operands[1], operands[2]));
1905   DONE;
1906 })
1907
1908 (define_expand "neon_vsub<mode>"
1909   [(match_operand:VH 0 "s_register_operand")
1910    (match_operand:VH 1 "s_register_operand")
1911    (match_operand:VH 2 "s_register_operand")]
1912   "TARGET_NEON_FP16INST"
1913 {
1914   emit_insn (gen_sub<mode>3_fp16 (operands[0], operands[1], operands[2]));
1915   DONE;
1916 })
1917
1918 ; Note that NEON operations don't support the full IEEE 754 standard: in
1919 ; particular, denormal values are flushed to zero.  This means that GCC cannot
1920 ; use those instructions for autovectorization, etc. unless
1921 ; -funsafe-math-optimizations is in effect (in which case flush-to-zero
1922 ; behavior is permissible).  Intrinsic operations (provided by the arm_neon.h
1923 ; header) must work in either case: if -funsafe-math-optimizations is given,
1924 ; intrinsics expand to "canonical" RTL where possible, otherwise intrinsics
1925 ; expand to unspecs (which may potentially limit the extent to which they might
1926 ; be optimized by generic code).
1927
1928 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
1929
1930 (define_insn "neon_vadd<mode>_unspec"
1931   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
1932         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
1933                       (match_operand:VCVTF 2 "s_register_operand" "w")]
1934                      UNSPEC_VADD))]
1935   "TARGET_NEON"
1936   "vadd.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1937   [(set (attr "type")
1938       (if_then_else (match_test "<Is_float_mode>")
1939                     (const_string "neon_fp_addsub_s<q>")
1940                     (const_string "neon_add<q>")))]
1941 )
1942
1943 (define_insn "neon_vaddl<sup><mode>"
1944   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1945         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
1946                            (match_operand:VDI 2 "s_register_operand" "w")]
1947                           VADDL))]
1948   "TARGET_NEON"
1949   "vaddl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
1950   [(set_attr "type" "neon_add_long")]
1951 )
1952
1953 (define_insn "neon_vaddw<sup><mode>"
1954   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
1955         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
1956                            (match_operand:VDI 2 "s_register_operand" "w")]
1957                           VADDW))]
1958   "TARGET_NEON"
1959   "vaddw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
1960   [(set_attr "type" "neon_add_widen")]
1961 )
1962
1963 ; vhadd and vrhadd.
1964
1965 (define_insn "neon_v<r>hadd<sup><mode>"
1966   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
1967         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
1968                        (match_operand:VDQIW 2 "s_register_operand" "w")]
1969                       VHADD))]
1970   "TARGET_NEON"
1971   "v<r>hadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1972   [(set_attr "type" "neon_add_halve_q")]
1973 )
1974
1975 (define_insn "neon_vqadd<sup><mode>"
1976   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
1977         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
1978                        (match_operand:VDQIX 2 "s_register_operand" "w")]
1979                      VQADD))]
1980   "TARGET_NEON"
1981   "vqadd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
1982   [(set_attr "type" "neon_qadd<q>")]
1983 )
1984
1985 (define_insn "neon_v<r>addhn<mode>"
1986   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
1987         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
1988                             (match_operand:VN 2 "s_register_operand" "w")]
1989                            VADDHN))]
1990   "TARGET_NEON"
1991   "v<r>addhn.<V_if_elem>\t%P0, %q1, %q2"
1992   [(set_attr "type" "neon_add_halve_narrow_q")]
1993 )
1994
1995 ;; Polynomial and Float multiplication.
1996 (define_insn "neon_vmul<pf><mode>"
1997   [(set (match_operand:VPF 0 "s_register_operand" "=w")
1998         (unspec:VPF [(match_operand:VPF 1 "s_register_operand" "w")
1999                       (match_operand:VPF 2 "s_register_operand" "w")]
2000                      UNSPEC_VMUL))]
2001   "TARGET_NEON"
2002   "vmul.<pf>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2003   [(set (attr "type")
2004       (if_then_else (match_test "<Is_float_mode>")
2005                     (const_string "neon_fp_mul_s<q>")
2006                     (const_string "neon_mul_<V_elem_ch><q>")))]
2007 )
2008
2009 (define_insn "mul<mode>3"
2010  [(set
2011    (match_operand:VH 0 "s_register_operand" "=w")
2012    (mult:VH
2013     (match_operand:VH 1 "s_register_operand" "w")
2014     (match_operand:VH 2 "s_register_operand" "w")))]
2015   "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2016   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2017  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2018 )
2019
2020 (define_insn "neon_vmulf<mode>"
2021  [(set
2022    (match_operand:VH 0 "s_register_operand" "=w")
2023    (mult:VH
2024     (match_operand:VH 1 "s_register_operand" "w")
2025     (match_operand:VH 2 "s_register_operand" "w")))]
2026   "TARGET_NEON_FP16INST"
2027   "vmul.f16\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2028  [(set_attr "type" "neon_mul_<VH_elem_ch><q>")]
2029 )
2030
2031 (define_expand "neon_vmla<mode>"
2032   [(match_operand:VDQW 0 "s_register_operand")
2033    (match_operand:VDQW 1 "s_register_operand")
2034    (match_operand:VDQW 2 "s_register_operand")
2035    (match_operand:VDQW 3 "s_register_operand")]
2036   "TARGET_NEON"
2037 {
2038   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2039     emit_insn (gen_mul<mode>3add<mode>_neon (operands[0], operands[1],
2040                                              operands[2], operands[3]));
2041   else
2042     emit_insn (gen_neon_vmla<mode>_unspec (operands[0], operands[1],
2043                                            operands[2], operands[3]));
2044   DONE;
2045 })
2046
2047 (define_expand "neon_vfma<VCVTF:mode>"
2048   [(match_operand:VCVTF 0 "s_register_operand")
2049    (match_operand:VCVTF 1 "s_register_operand")
2050    (match_operand:VCVTF 2 "s_register_operand")
2051    (match_operand:VCVTF 3 "s_register_operand")]
2052   "TARGET_NEON && TARGET_FMA"
2053 {
2054   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2055                                        operands[1]));
2056   DONE;
2057 })
2058
2059 (define_expand "neon_vfma<VH:mode>"
2060   [(match_operand:VH 0 "s_register_operand")
2061    (match_operand:VH 1 "s_register_operand")
2062    (match_operand:VH 2 "s_register_operand")
2063    (match_operand:VH 3 "s_register_operand")]
2064   "TARGET_NEON_FP16INST"
2065 {
2066   emit_insn (gen_fma<mode>4_intrinsic (operands[0], operands[2], operands[3],
2067                                        operands[1]));
2068   DONE;
2069 })
2070
2071 (define_expand "neon_vfms<VCVTF:mode>"
2072   [(match_operand:VCVTF 0 "s_register_operand")
2073    (match_operand:VCVTF 1 "s_register_operand")
2074    (match_operand:VCVTF 2 "s_register_operand")
2075    (match_operand:VCVTF 3 "s_register_operand")]
2076   "TARGET_NEON && TARGET_FMA"
2077 {
2078   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2079                                          operands[1]));
2080   DONE;
2081 })
2082
2083 (define_expand "neon_vfms<VH:mode>"
2084   [(match_operand:VH 0 "s_register_operand")
2085    (match_operand:VH 1 "s_register_operand")
2086    (match_operand:VH 2 "s_register_operand")
2087    (match_operand:VH 3 "s_register_operand")]
2088   "TARGET_NEON_FP16INST"
2089 {
2090   emit_insn (gen_fmsub<mode>4_intrinsic (operands[0], operands[2], operands[3],
2091                                          operands[1]));
2092   DONE;
2093 })
2094
2095 ;; The expand RTL structure here is not important.
2096 ;; We use the gen_* functions anyway.
2097 ;; We just need something to wrap the iterators around.
2098
2099 (define_expand "neon_vfm<vfml_op>l_<vfml_half><mode>"
2100   [(set (match_operand:VCVTF 0 "s_register_operand")
2101      (unspec:VCVTF
2102         [(match_operand:VCVTF 1 "s_register_operand")
2103            (PLUSMINUS:<VFML>
2104              (match_operand:<VFML> 2 "s_register_operand")
2105              (match_operand:<VFML> 3 "s_register_operand"))] VFMLHALVES))]
2106   "TARGET_FP16FML"
2107 {
2108   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2109   emit_insn (gen_vfm<vfml_op>l_<vfml_half><mode>_intrinsic (operands[0],
2110                                                              operands[1],
2111                                                              operands[2],
2112                                                              operands[3],
2113                                                              half, half));
2114   DONE;
2115 })
2116
2117 (define_insn "vfmal_low<mode>_intrinsic"
2118  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2119         (fma:VCVTF
2120          (float_extend:VCVTF
2121           (vec_select:<VFMLSEL>
2122            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2123            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2124          (float_extend:VCVTF
2125           (vec_select:<VFMLSEL>
2126            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2127            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2128          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2129  "TARGET_FP16FML"
2130  "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2131  [(set_attr "type" "neon_fp_mla_s<q>")]
2132 )
2133
2134 (define_insn "vfmsl_high<mode>_intrinsic"
2135  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2136         (fma:VCVTF
2137          (float_extend:VCVTF
2138           (neg:<VFMLSEL>
2139             (vec_select:<VFMLSEL>
2140               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2141               (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2142          (float_extend:VCVTF
2143           (vec_select:<VFMLSEL>
2144            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2145            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2146          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2147  "TARGET_FP16FML"
2148  "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2149  [(set_attr "type" "neon_fp_mla_s<q>")]
2150 )
2151
2152 (define_insn "vfmal_high<mode>_intrinsic"
2153  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2154         (fma:VCVTF
2155          (float_extend:VCVTF
2156           (vec_select:<VFMLSEL>
2157            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2158            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2159          (float_extend:VCVTF
2160           (vec_select:<VFMLSEL>
2161            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2162            (match_operand:<VFML> 5 "vect_par_constant_high" "")))
2163          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2164  "TARGET_FP16FML"
2165  "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3"
2166  [(set_attr "type" "neon_fp_mla_s<q>")]
2167 )
2168
2169 (define_insn "vfmsl_low<mode>_intrinsic"
2170  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2171         (fma:VCVTF
2172          (float_extend:VCVTF
2173           (neg:<VFMLSEL>
2174             (vec_select:<VFMLSEL>
2175               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2176               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2177          (float_extend:VCVTF
2178           (vec_select:<VFMLSEL>
2179            (match_operand:<VFML> 3 "s_register_operand" "<VF_constraint>")
2180            (match_operand:<VFML> 5 "vect_par_constant_low" "")))
2181          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2182  "TARGET_FP16FML"
2183  "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3"
2184  [(set_attr "type" "neon_fp_mla_s<q>")]
2185 )
2186
2187 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><VCVTF:mode>"
2188   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2189      (unspec:VCVTF
2190         [(match_operand:VCVTF 1 "s_register_operand")
2191          (PLUSMINUS:<VFML>
2192            (match_operand:<VFML> 2 "s_register_operand")
2193            (match_operand:<VFML> 3 "s_register_operand"))
2194          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2195   "TARGET_FP16FML"
2196 {
2197   rtx lane = GEN_INT (NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[4])));
2198   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2199   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><mode>_intrinsic
2200                                                (operands[0], operands[1],
2201                                                 operands[2], operands[3],
2202                                                 half, lane));
2203   DONE;
2204 })
2205
2206 (define_insn "vfmal_lane_low<mode>_intrinsic"
2207  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2208         (fma:VCVTF
2209          (float_extend:VCVTF
2210           (vec_select:<VFMLSEL>
2211            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2212            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2213          (float_extend:VCVTF
2214            (vec_duplicate:<VFMLSEL>
2215              (vec_select:HF
2216                (match_operand:<VFML> 3 "s_register_operand" "x")
2217                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2218          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2219  "TARGET_FP16FML"
2220  {
2221     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2222     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2223       {
2224         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2225         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2226       }
2227     else
2228       {
2229         operands[5] = GEN_INT (lane);
2230         return "vfmal.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2231       }
2232   }
2233  [(set_attr "type" "neon_fp_mla_s<q>")]
2234 )
2235
2236 (define_expand "neon_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>"
2237   [(set:VCVTF (match_operand:VCVTF 0 "s_register_operand")
2238      (unspec:VCVTF
2239         [(match_operand:VCVTF 1 "s_register_operand")
2240          (PLUSMINUS:<VFML>
2241            (match_operand:<VFML> 2 "s_register_operand")
2242            (match_operand:<VFMLSEL2> 3 "s_register_operand"))
2243          (match_operand:SI 4 "const_int_operand")] VFMLHALVES))]
2244   "TARGET_FP16FML"
2245 {
2246   rtx lane
2247     = GEN_INT (NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[4])));
2248   rtx half = arm_simd_vect_par_cnst_half (<VFML>mode, <vfml_half_selector>);
2249   emit_insn (gen_vfm<vfml_op>l_lane_<vfml_half><vfmlsel2><mode>_intrinsic
2250                 (operands[0], operands[1], operands[2], operands[3],
2251                  half, lane));
2252   DONE;
2253 })
2254
2255 ;; Used to implement the intrinsics:
2256 ;; float32x4_t vfmlalq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2257 ;; float32x2_t vfmlal_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2258 ;; Needs a bit of care to get the modes of the different sub-expressions right
2259 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2260 ;; S or D subregister to select the appropriate lane from.
2261
2262 (define_insn "vfmal_lane_low<vfmlsel2><mode>_intrinsic"
2263  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2264         (fma:VCVTF
2265          (float_extend:VCVTF
2266           (vec_select:<VFMLSEL>
2267            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2268            (match_operand:<VFML> 4 "vect_par_constant_low" "")))
2269          (float_extend:VCVTF
2270            (vec_duplicate:<VFMLSEL>
2271              (vec_select:HF
2272                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2273                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2274          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2275  "TARGET_FP16FML"
2276  {
2277    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2278    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2279    int new_lane = lane % elts_per_reg;
2280    int regdiff = lane / elts_per_reg;
2281    operands[5] = GEN_INT (new_lane);
2282    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2283       because we want the print_operand code to print the appropriate
2284       S or D register prefix.  */
2285    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2286    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2287    return "vfmal.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2288  }
2289  [(set_attr "type" "neon_fp_mla_s<q>")]
2290 )
2291
2292 ;; Used to implement the intrinsics:
2293 ;; float32x4_t vfmlalq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2294 ;; float32x2_t vfmlal_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2295 ;; Needs a bit of care to get the modes of the different sub-expressions right
2296 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2297 ;; S or D subregister to select the appropriate lane from.
2298
2299 (define_insn "vfmal_lane_high<vfmlsel2><mode>_intrinsic"
2300  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2301         (fma:VCVTF
2302          (float_extend:VCVTF
2303           (vec_select:<VFMLSEL>
2304            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2305            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2306          (float_extend:VCVTF
2307            (vec_duplicate:<VFMLSEL>
2308              (vec_select:HF
2309                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2310                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2311          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2312  "TARGET_FP16FML"
2313  {
2314    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2315    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2316    int new_lane = lane % elts_per_reg;
2317    int regdiff = lane / elts_per_reg;
2318    operands[5] = GEN_INT (new_lane);
2319    /* We re-create operands[3] in the halved VFMLSEL mode
2320       because we've calculated the correct half-width subreg to extract
2321       the lane from and we want to print *that* subreg instead.  */
2322    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2323    return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2324  }
2325  [(set_attr "type" "neon_fp_mla_s<q>")]
2326 )
2327
2328 (define_insn "vfmal_lane_high<mode>_intrinsic"
2329  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2330         (fma:VCVTF
2331          (float_extend:VCVTF
2332           (vec_select:<VFMLSEL>
2333            (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2334            (match_operand:<VFML> 4 "vect_par_constant_high" "")))
2335          (float_extend:VCVTF
2336            (vec_duplicate:<VFMLSEL>
2337              (vec_select:HF
2338                (match_operand:<VFML> 3 "s_register_operand" "x")
2339                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2340          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2341  "TARGET_FP16FML"
2342   {
2343     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2344     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2345       {
2346         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2347         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2348       }
2349     else
2350       {
2351         operands[5] = GEN_INT (lane);
2352         return "vfmal.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2353       }
2354   }
2355  [(set_attr "type" "neon_fp_mla_s<q>")]
2356 )
2357
2358 (define_insn "vfmsl_lane_low<mode>_intrinsic"
2359  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2360         (fma:VCVTF
2361          (float_extend:VCVTF
2362           (neg:<VFMLSEL>
2363             (vec_select:<VFMLSEL>
2364               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2365               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2366          (float_extend:VCVTF
2367            (vec_duplicate:<VFMLSEL>
2368              (vec_select:HF
2369                (match_operand:<VFML> 3 "s_register_operand" "x")
2370                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2371          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2372  "TARGET_FP16FML"
2373  {
2374     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2375     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2376       {
2377         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2378         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_hi>3[%c5]";
2379       }
2380     else
2381       {
2382         operands[5] = GEN_INT (lane);
2383         return "vfmsl.f16\\t%<V_reg>0, %<V_lo>2, %<V_lo>3[%c5]";
2384       }
2385   }
2386  [(set_attr "type" "neon_fp_mla_s<q>")]
2387 )
2388
2389 ;; Used to implement the intrinsics:
2390 ;; float32x4_t vfmlslq_lane_low_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2391 ;; float32x2_t vfmlsl_laneq_low_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2392 ;; Needs a bit of care to get the modes of the different sub-expressions right
2393 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2394 ;; S or D subregister to select the appropriate lane from.
2395
2396 (define_insn "vfmsl_lane_low<vfmlsel2><mode>_intrinsic"
2397  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2398         (fma:VCVTF
2399          (float_extend:VCVTF
2400           (neg:<VFMLSEL>
2401             (vec_select:<VFMLSEL>
2402               (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2403               (match_operand:<VFML> 4 "vect_par_constant_low" ""))))
2404          (float_extend:VCVTF
2405            (vec_duplicate:<VFMLSEL>
2406              (vec_select:HF
2407                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2408                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2409          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2410  "TARGET_FP16FML"
2411  {
2412    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2413    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2414    int new_lane = lane % elts_per_reg;
2415    int regdiff = lane / elts_per_reg;
2416    operands[5] = GEN_INT (new_lane);
2417    /* We re-create operands[2] and operands[3] in the halved VFMLSEL modes
2418       because we want the print_operand code to print the appropriate
2419       S or D register prefix.  */
2420    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2421    operands[2] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[2]));
2422    return "vfmsl.f16\\t%<V_reg>0, %<V_lane_reg>2, %<V_lane_reg>3[%c5]";
2423  }
2424  [(set_attr "type" "neon_fp_mla_s<q>")]
2425 )
2426
2427 ;; Used to implement the intrinsics:
2428 ;; float32x4_t vfmlslq_lane_high_f16 (float32x4_t r, float16x8_t a, float16x4_t b, const int lane)
2429 ;; float32x2_t vfmlsl_laneq_high_f16 (float32x2_t r, float16x4_t a, float16x8_t b, const int lane)
2430 ;; Needs a bit of care to get the modes of the different sub-expressions right
2431 ;; due to 'a' and 'b' having different sizes and make sure we use the right
2432 ;; S or D subregister to select the appropriate lane from.
2433
2434 (define_insn "vfmsl_lane_high<vfmlsel2><mode>_intrinsic"
2435  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2436         (fma:VCVTF
2437          (float_extend:VCVTF
2438           (neg:<VFMLSEL>
2439             (vec_select:<VFMLSEL>
2440              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2441              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2442          (float_extend:VCVTF
2443            (vec_duplicate:<VFMLSEL>
2444              (vec_select:HF
2445                (match_operand:<VFMLSEL2> 3 "s_register_operand" "x")
2446                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2447          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2448  "TARGET_FP16FML"
2449  {
2450    int lane = NEON_ENDIAN_LANE_N (<VFMLSEL2>mode, INTVAL (operands[5]));
2451    int elts_per_reg = GET_MODE_NUNITS (<VFMLSEL>mode);
2452    int new_lane = lane % elts_per_reg;
2453    int regdiff = lane / elts_per_reg;
2454    operands[5] = GEN_INT (new_lane);
2455    /* We re-create operands[3] in the halved VFMLSEL mode
2456       because we've calculated the correct half-width subreg to extract
2457       the lane from and we want to print *that* subreg instead.  */
2458    operands[3] = gen_rtx_REG (<VFMLSEL>mode, REGNO (operands[3]) + regdiff);
2459    return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lane_reg>3[%c5]";
2460  }
2461  [(set_attr "type" "neon_fp_mla_s<q>")]
2462 )
2463
2464 (define_insn "vfmsl_lane_high<mode>_intrinsic"
2465  [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2466         (fma:VCVTF
2467          (float_extend:VCVTF
2468           (neg:<VFMLSEL>
2469             (vec_select:<VFMLSEL>
2470              (match_operand:<VFML> 2 "s_register_operand" "<VF_constraint>")
2471              (match_operand:<VFML> 4 "vect_par_constant_high" ""))))
2472          (float_extend:VCVTF
2473            (vec_duplicate:<VFMLSEL>
2474              (vec_select:HF
2475                (match_operand:<VFML> 3 "s_register_operand" "x")
2476                (parallel [(match_operand:SI 5 "const_int_operand" "n")]))))
2477          (match_operand:VCVTF 1 "s_register_operand" "0")))]
2478  "TARGET_FP16FML"
2479   {
2480     int lane = NEON_ENDIAN_LANE_N (<VFML>mode, INTVAL (operands[5]));
2481     if (lane > GET_MODE_NUNITS (<VFMLSEL>mode) - 1)
2482       {
2483         operands[5] = GEN_INT (lane - GET_MODE_NUNITS (<VFMLSEL>mode));
2484         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_hi>3[%c5]";
2485       }
2486     else
2487       {
2488         operands[5] = GEN_INT (lane);
2489         return "vfmsl.f16\\t%<V_reg>0, %<V_hi>2, %<V_lo>3[%c5]";
2490       }
2491   }
2492  [(set_attr "type" "neon_fp_mla_s<q>")]
2493 )
2494
2495 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2496
2497 (define_insn "neon_vmla<mode>_unspec"
2498   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2499         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2500                       (match_operand:VDQW 2 "s_register_operand" "w")
2501                       (match_operand:VDQW 3 "s_register_operand" "w")]
2502                     UNSPEC_VMLA))]
2503   "TARGET_NEON"
2504   "vmla.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2505   [(set (attr "type")
2506       (if_then_else (match_test "<Is_float_mode>")
2507                     (const_string "neon_fp_mla_s<q>")
2508                     (const_string "neon_mla_<V_elem_ch><q>")))]
2509 )
2510
2511 (define_insn "neon_vmlal<sup><mode>"
2512   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2513         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2514                            (match_operand:VW 2 "s_register_operand" "w")
2515                            (match_operand:VW 3 "s_register_operand" "w")]
2516                           VMLAL))]
2517   "TARGET_NEON"
2518   "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2519   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2520 )
2521
2522 (define_expand "neon_vmls<mode>"
2523   [(match_operand:VDQW 0 "s_register_operand")
2524    (match_operand:VDQW 1 "s_register_operand")
2525    (match_operand:VDQW 2 "s_register_operand")
2526    (match_operand:VDQW 3 "s_register_operand")]
2527   "TARGET_NEON"
2528 {
2529   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2530     emit_insn (gen_mul<mode>3neg<mode>add<mode>_neon (operands[0],
2531                  operands[1], operands[2], operands[3]));
2532   else
2533     emit_insn (gen_neon_vmls<mode>_unspec (operands[0], operands[1],
2534                                            operands[2], operands[3]));
2535   DONE;
2536 })
2537
2538 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2539
2540 (define_insn "neon_vmls<mode>_unspec"
2541   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
2542         (unspec:VDQW [(match_operand:VDQW 1 "s_register_operand" "0")
2543                       (match_operand:VDQW 2 "s_register_operand" "w")
2544                       (match_operand:VDQW 3 "s_register_operand" "w")]
2545                     UNSPEC_VMLS))]
2546   "TARGET_NEON"
2547   "vmls.<V_if_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2548   [(set (attr "type")
2549       (if_then_else (match_test "<Is_float_mode>")
2550                     (const_string "neon_fp_mla_s<q>")
2551                     (const_string "neon_mla_<V_elem_ch><q>")))]
2552 )
2553
2554 (define_insn "neon_vmlsl<sup><mode>"
2555   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2556         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2557                            (match_operand:VW 2 "s_register_operand" "w")
2558                            (match_operand:VW 3 "s_register_operand" "w")]
2559                           VMLSL))]
2560   "TARGET_NEON"
2561   "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
2562   [(set_attr "type" "neon_mla_<V_elem_ch>_long")]
2563 )
2564
2565 ;; vqdmulh, vqrdmulh
2566 (define_insn "neon_vq<r>dmulh<mode>"
2567   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2568         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "w")
2569                        (match_operand:VMDQI 2 "s_register_operand" "w")]
2570                       VQDMULH))]
2571   "TARGET_NEON"
2572   "vq<r>dmulh.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2573   [(set_attr "type" "neon_sat_mul_<V_elem_ch><q>")]
2574 )
2575
2576 ;; vqrdmlah, vqrdmlsh
2577 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h<mode>"
2578   [(set (match_operand:VMDQI 0 "s_register_operand" "=w")
2579         (unspec:VMDQI [(match_operand:VMDQI 1 "s_register_operand" "0")
2580                        (match_operand:VMDQI 2 "s_register_operand" "w")
2581                        (match_operand:VMDQI 3 "s_register_operand" "w")]
2582                       VQRDMLH_AS))]
2583   "TARGET_NEON_RDMA"
2584   "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
2585   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2586 )
2587
2588 (define_insn "neon_vqdmlal<mode>"
2589   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2590         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2591                            (match_operand:VMDI 2 "s_register_operand" "w")
2592                            (match_operand:VMDI 3 "s_register_operand" "w")]
2593                           UNSPEC_VQDMLAL))]
2594   "TARGET_NEON"
2595   "vqdmlal.<V_s_elem>\t%q0, %P2, %P3"
2596   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2597 )
2598
2599 (define_insn "neon_vqdmlsl<mode>"
2600   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2601         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
2602                            (match_operand:VMDI 2 "s_register_operand" "w")
2603                            (match_operand:VMDI 3 "s_register_operand" "w")]
2604                           UNSPEC_VQDMLSL))]
2605   "TARGET_NEON"
2606   "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3"
2607   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_long")]
2608 )
2609
2610 (define_insn "neon_vmull<sup><mode>"
2611   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2612         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
2613                            (match_operand:VW 2 "s_register_operand" "w")]
2614                           VMULL))]
2615   "TARGET_NEON"
2616   "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2617   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
2618 )
2619
2620 (define_insn "neon_vqdmull<mode>"
2621   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2622         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
2623                            (match_operand:VMDI 2 "s_register_operand" "w")]
2624                           UNSPEC_VQDMULL))]
2625   "TARGET_NEON"
2626   "vqdmull.<V_s_elem>\t%q0, %P1, %P2"
2627   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_long")]
2628 )
2629
2630 (define_expand "neon_vsub<mode>"
2631   [(match_operand:VCVTF 0 "s_register_operand")
2632    (match_operand:VCVTF 1 "s_register_operand")
2633    (match_operand:VCVTF 2 "s_register_operand")]
2634   "TARGET_NEON"
2635 {
2636   if (!<Is_float_mode> || flag_unsafe_math_optimizations)
2637     emit_insn (gen_sub<mode>3 (operands[0], operands[1], operands[2]));
2638   else
2639     emit_insn (gen_neon_vsub<mode>_unspec (operands[0], operands[1],
2640                                            operands[2]));
2641   DONE;
2642 })
2643
2644 ; Used for intrinsics when flag_unsafe_math_optimizations is false.
2645
2646 (define_insn "neon_vsub<mode>_unspec"
2647   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2648         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2649                       (match_operand:VCVTF 2 "s_register_operand" "w")]
2650                      UNSPEC_VSUB))]
2651   "TARGET_NEON"
2652   "vsub.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2653   [(set (attr "type")
2654       (if_then_else (match_test "<Is_float_mode>")
2655                     (const_string "neon_fp_addsub_s<q>")
2656                     (const_string "neon_sub<q>")))]
2657 )
2658
2659 (define_insn "neon_vsubl<sup><mode>"
2660   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2661         (unspec:<V_widen> [(match_operand:VDI 1 "s_register_operand" "w")
2662                            (match_operand:VDI 2 "s_register_operand" "w")]
2663                           VSUBL))]
2664   "TARGET_NEON"
2665   "vsubl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
2666   [(set_attr "type" "neon_sub_long")]
2667 )
2668
2669 (define_insn "neon_vsubw<sup><mode>"
2670   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
2671         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "w")
2672                            (match_operand:VDI 2 "s_register_operand" "w")]
2673                           VSUBW))]
2674   "TARGET_NEON"
2675   "vsubw.<sup>%#<V_sz_elem>\t%q0, %q1, %P2"
2676   [(set_attr "type" "neon_sub_widen")]
2677 )
2678
2679 (define_insn "neon_vqsub<sup><mode>"
2680   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
2681         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
2682                        (match_operand:VDQIX 2 "s_register_operand" "w")]
2683                       VQSUB))]
2684   "TARGET_NEON"
2685   "vqsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2686   [(set_attr "type" "neon_qsub<q>")]
2687 )
2688
2689 (define_insn "neon_vhsub<sup><mode>"
2690   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2691         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2692                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2693                       VHSUB))]
2694   "TARGET_NEON"
2695   "vhsub.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2696   [(set_attr "type" "neon_sub_halve<q>")]
2697 )
2698
2699 (define_insn "neon_v<r>subhn<mode>"
2700   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
2701         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
2702                             (match_operand:VN 2 "s_register_operand" "w")]
2703                            VSUBHN))]
2704   "TARGET_NEON"
2705   "v<r>subhn.<V_if_elem>\t%P0, %q1, %q2"
2706   [(set_attr "type" "neon_sub_halve_narrow_q")]
2707 )
2708
2709 ;; These may expand to an UNSPEC pattern when a floating point mode is used
2710 ;; without unsafe math optimizations.
2711 (define_expand "neon_vc<cmp_op><mode>"
2712   [(match_operand:<V_cmp_result> 0 "s_register_operand")
2713      (neg:<V_cmp_result>
2714        (COMPARISONS:VDQW (match_operand:VDQW 1 "s_register_operand")
2715                          (match_operand:VDQW 2 "reg_or_zero_operand")))]
2716   "TARGET_NEON"
2717   {
2718     /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2719        are enabled.  */
2720     if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2721         && !flag_unsafe_math_optimizations)
2722       {
2723         /* We don't just emit a gen_neon_vc<cmp_op><mode>_insn_unspec because
2724            we define gen_neon_vceq<mode>_insn_unspec only for float modes
2725            whereas this expander iterates over the integer modes as well,
2726            but we will never expand to UNSPECs for the integer comparisons.  */
2727         switch (<MODE>mode)
2728           {
2729             case E_V2SFmode:
2730               emit_insn (gen_neon_vc<cmp_op>v2sf_insn_unspec (operands[0],
2731                                                               operands[1],
2732                                                               operands[2]));
2733               break;
2734             case E_V4SFmode:
2735               emit_insn (gen_neon_vc<cmp_op>v4sf_insn_unspec (operands[0],
2736                                                               operands[1],
2737                                                               operands[2]));
2738               break;
2739             default:
2740               gcc_unreachable ();
2741           }
2742       }
2743     else
2744       emit_insn (gen_neon_vc<cmp_op><mode>_insn (operands[0],
2745                                                  operands[1],
2746                                                  operands[2]));
2747     DONE;
2748   }
2749 )
2750
2751 (define_insn "neon_vc<cmp_op><mode>_insn"
2752   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2753         (neg:<V_cmp_result>
2754           (COMPARISONS:<V_cmp_result>
2755             (match_operand:VDQW 1 "s_register_operand" "w,w")
2756             (match_operand:VDQW 2 "reg_or_zero_operand" "w,Dz"))))]
2757   "TARGET_NEON && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2758                     && !flag_unsafe_math_optimizations)"
2759   {
2760     char pattern[100];
2761     sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2762                       " %%<V_reg>1, %s",
2763                        GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2764                          ? "f" : "<cmp_type>",
2765                        which_alternative == 0
2766                          ? "%<V_reg>2" : "#0");
2767     output_asm_insn (pattern, operands);
2768     return "";
2769   }
2770   [(set (attr "type")
2771         (if_then_else (match_operand 2 "zero_operand")
2772                       (const_string "neon_compare_zero<q>")
2773                       (const_string "neon_compare<q>")))]
2774 )
2775
2776 (define_insn "neon_vc<cmp_op_unsp><mode>_insn_unspec"
2777   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2778         (unspec:<V_cmp_result>
2779           [(match_operand:VCVTF 1 "s_register_operand" "w,w")
2780            (match_operand:VCVTF 2 "reg_or_zero_operand" "w,Dz")]
2781           NEON_VCMP))]
2782   "TARGET_NEON"
2783   {
2784     char pattern[100];
2785     sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2786                        " %%<V_reg>1, %s",
2787                        which_alternative == 0
2788                          ? "%<V_reg>2" : "#0");
2789     output_asm_insn (pattern, operands);
2790     return "";
2791 }
2792   [(set_attr "type" "neon_fp_compare_s<q>")]
2793 )
2794
2795 (define_expand "neon_vc<cmp_op><mode>"
2796  [(match_operand:<V_cmp_result> 0 "s_register_operand")
2797   (neg:<V_cmp_result>
2798    (COMPARISONS:VH
2799     (match_operand:VH 1 "s_register_operand")
2800     (match_operand:VH 2 "reg_or_zero_operand")))]
2801  "TARGET_NEON_FP16INST"
2802 {
2803   /* For FP comparisons use UNSPECS unless -funsafe-math-optimizations
2804      are enabled.  */
2805   if (GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2806       && !flag_unsafe_math_optimizations)
2807     emit_insn
2808       (gen_neon_vc<cmp_op><mode>_fp16insn_unspec
2809        (operands[0], operands[1], operands[2]));
2810   else
2811     emit_insn
2812       (gen_neon_vc<cmp_op><mode>_fp16insn
2813        (operands[0], operands[1], operands[2]));
2814   DONE;
2815 })
2816
2817 (define_insn "neon_vc<cmp_op><mode>_fp16insn"
2818  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2819    (neg:<V_cmp_result>
2820     (COMPARISONS:<V_cmp_result>
2821      (match_operand:VH 1 "s_register_operand" "w,w")
2822      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz"))))]
2823  "TARGET_NEON_FP16INST
2824   && !(GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2825   && !flag_unsafe_math_optimizations)"
2826 {
2827   char pattern[100];
2828   sprintf (pattern, "vc<cmp_op>.%s%%#<V_sz_elem>\t%%<V_reg>0,"
2829            " %%<V_reg>1, %s",
2830            GET_MODE_CLASS (<MODE>mode) == MODE_VECTOR_FLOAT
2831            ? "f" : "<cmp_type>",
2832            which_alternative == 0
2833            ? "%<V_reg>2" : "#0");
2834   output_asm_insn (pattern, operands);
2835   return "";
2836 }
2837  [(set (attr "type")
2838    (if_then_else (match_operand 2 "zero_operand")
2839     (const_string "neon_compare_zero<q>")
2840     (const_string "neon_compare<q>")))])
2841
2842 (define_insn "neon_vc<cmp_op_unsp><mode>_fp16insn_unspec"
2843  [(set
2844    (match_operand:<V_cmp_result> 0 "s_register_operand" "=w,w")
2845    (unspec:<V_cmp_result>
2846     [(match_operand:VH 1 "s_register_operand" "w,w")
2847      (match_operand:VH 2 "reg_or_zero_operand" "w,Dz")]
2848     NEON_VCMP))]
2849  "TARGET_NEON_FP16INST"
2850 {
2851   char pattern[100];
2852   sprintf (pattern, "vc<cmp_op_unsp>.f%%#<V_sz_elem>\t%%<V_reg>0,"
2853            " %%<V_reg>1, %s",
2854            which_alternative == 0
2855            ? "%<V_reg>2" : "#0");
2856   output_asm_insn (pattern, operands);
2857   return "";
2858 }
2859  [(set_attr "type" "neon_fp_compare_s<q>")])
2860
2861 (define_insn "neon_vc<cmp_op>u<mode>"
2862   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2863         (neg:<V_cmp_result>
2864           (GTUGEU:<V_cmp_result>
2865             (match_operand:VDQIW 1 "s_register_operand" "w")
2866             (match_operand:VDQIW 2 "s_register_operand" "w"))))]
2867   "TARGET_NEON"
2868   "vc<cmp_op>.u%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2869   [(set_attr "type" "neon_compare<q>")]
2870 )
2871
2872 (define_expand "neon_vca<cmp_op><mode>"
2873   [(set (match_operand:<V_cmp_result> 0 "s_register_operand")
2874         (neg:<V_cmp_result>
2875           (GTGE:<V_cmp_result>
2876             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand"))
2877             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand")))))]
2878   "TARGET_NEON"
2879   {
2880     if (flag_unsafe_math_optimizations)
2881       emit_insn (gen_neon_vca<cmp_op><mode>_insn (operands[0], operands[1],
2882                                                   operands[2]));
2883     else
2884       emit_insn (gen_neon_vca<cmp_op><mode>_insn_unspec (operands[0],
2885                                                          operands[1],
2886                                                          operands[2]));
2887     DONE;
2888   }
2889 )
2890
2891 (define_insn "neon_vca<cmp_op><mode>_insn"
2892   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2893         (neg:<V_cmp_result>
2894           (GTGE:<V_cmp_result>
2895             (abs:VCVTF (match_operand:VCVTF 1 "s_register_operand" "w"))
2896             (abs:VCVTF (match_operand:VCVTF 2 "s_register_operand" "w")))))]
2897   "TARGET_NEON && flag_unsafe_math_optimizations"
2898   "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2899   [(set_attr "type" "neon_fp_compare_s<q>")]
2900 )
2901
2902 (define_insn "neon_vca<cmp_op_unsp><mode>_insn_unspec"
2903   [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2904         (unspec:<V_cmp_result> [(match_operand:VCVTF 1 "s_register_operand" "w")
2905                                 (match_operand:VCVTF 2 "s_register_operand" "w")]
2906                                NEON_VACMP))]
2907   "TARGET_NEON"
2908   "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2909   [(set_attr "type" "neon_fp_compare_s<q>")]
2910 )
2911
2912 (define_expand "neon_vca<cmp_op><mode>"
2913   [(set
2914     (match_operand:<V_cmp_result> 0 "s_register_operand")
2915     (neg:<V_cmp_result>
2916      (GLTE:<V_cmp_result>
2917       (abs:VH (match_operand:VH 1 "s_register_operand"))
2918       (abs:VH (match_operand:VH 2 "s_register_operand")))))]
2919  "TARGET_NEON_FP16INST"
2920 {
2921   if (flag_unsafe_math_optimizations)
2922     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn
2923                (operands[0], operands[1], operands[2]));
2924   else
2925     emit_insn (gen_neon_vca<cmp_op><mode>_fp16insn_unspec
2926                (operands[0], operands[1], operands[2]));
2927   DONE;
2928 })
2929
2930 (define_insn "neon_vca<cmp_op><mode>_fp16insn"
2931   [(set
2932     (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2933     (neg:<V_cmp_result>
2934      (GLTE:<V_cmp_result>
2935       (abs:VH (match_operand:VH 1 "s_register_operand" "w"))
2936       (abs:VH (match_operand:VH 2 "s_register_operand" "w")))))]
2937  "TARGET_NEON_FP16INST && flag_unsafe_math_optimizations"
2938  "vac<cmp_op>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2939  [(set_attr "type" "neon_fp_compare_s<q>")]
2940 )
2941
2942 (define_insn "neon_vca<cmp_op_unsp><mode>_fp16insn_unspec"
2943  [(set (match_operand:<V_cmp_result> 0 "s_register_operand" "=w")
2944    (unspec:<V_cmp_result>
2945     [(match_operand:VH 1 "s_register_operand" "w")
2946      (match_operand:VH 2 "s_register_operand" "w")]
2947     NEON_VAGLTE))]
2948  "TARGET_NEON"
2949  "vac<cmp_op_unsp>.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2950  [(set_attr "type" "neon_fp_compare_s<q>")]
2951 )
2952
2953 (define_expand "neon_vc<cmp_op>z<mode>"
2954  [(set
2955    (match_operand:<V_cmp_result> 0 "s_register_operand")
2956    (COMPARISONS:<V_cmp_result>
2957     (match_operand:VH 1 "s_register_operand")
2958     (const_int 0)))]
2959  "TARGET_NEON_FP16INST"
2960  {
2961   emit_insn (gen_neon_vc<cmp_op><mode> (operands[0], operands[1],
2962                                         CONST0_RTX (<MODE>mode)));
2963   DONE;
2964 })
2965
2966 (define_insn "neon_vtst<mode>"
2967   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2968         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2969                        (match_operand:VDQIW 2 "s_register_operand" "w")]
2970                       UNSPEC_VTST))]
2971   "TARGET_NEON"
2972   "vtst.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2973   [(set_attr "type" "neon_tst<q>")]
2974 )
2975
2976 (define_insn "neon_vabd<sup><mode>"
2977   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
2978         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
2979                       (match_operand:VDQIW 2 "s_register_operand" "w")]
2980                      VABD))]
2981   "TARGET_NEON"
2982   "vabd.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2983   [(set_attr "type" "neon_abd<q>")]
2984 )
2985
2986 (define_insn "neon_vabd<mode>"
2987   [(set (match_operand:VH 0 "s_register_operand" "=w")
2988     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
2989                 (match_operand:VH 2 "s_register_operand" "w")]
2990      UNSPEC_VABD_F))]
2991  "TARGET_NEON_FP16INST"
2992  "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
2993   [(set_attr "type" "neon_abd<q>")]
2994 )
2995
2996 (define_insn "neon_vabdf<mode>"
2997   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
2998         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
2999                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3000                      UNSPEC_VABD_F))]
3001   "TARGET_NEON"
3002   "vabd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3003   [(set_attr "type" "neon_fp_abd_s<q>")]
3004 )
3005
3006 (define_insn "neon_vabdl<sup><mode>"
3007   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3008         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
3009                            (match_operand:VW 2 "s_register_operand" "w")]
3010                           VABDL))]
3011   "TARGET_NEON"
3012   "vabdl.<sup>%#<V_sz_elem>\t%q0, %P1, %P2"
3013   [(set_attr "type" "neon_abd_long")]
3014 )
3015
3016 (define_insn "neon_vaba<sup><mode>"
3017   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3018         (plus:VDQIW (unspec:VDQIW [(match_operand:VDQIW 2 "s_register_operand" "w")
3019                                    (match_operand:VDQIW 3 "s_register_operand" "w")]
3020                                   VABD)
3021                     (match_operand:VDQIW 1 "s_register_operand" "0")))]
3022   "TARGET_NEON"
3023   "vaba.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3024   [(set_attr "type" "neon_arith_acc<q>")]
3025 )
3026
3027 (define_insn "neon_vabal<sup><mode>"
3028   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
3029         (plus:<V_widen> (unspec:<V_widen> [(match_operand:VW 2 "s_register_operand" "w")
3030                                            (match_operand:VW 3 "s_register_operand" "w")]
3031                                            VABDL)
3032                          (match_operand:<V_widen> 1 "s_register_operand" "0")))]
3033   "TARGET_NEON"
3034   "vabal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3"
3035   [(set_attr "type" "neon_arith_acc<q>")]
3036 )
3037
3038 (define_expand "<sup>sadv16qi"
3039   [(use (match_operand:V4SI 0 "register_operand"))
3040    (unspec:V16QI [(use (match_operand:V16QI 1 "register_operand"))
3041                   (use (match_operand:V16QI 2 "register_operand"))] VABAL)
3042    (use (match_operand:V4SI 3 "register_operand"))]
3043   "TARGET_NEON"
3044   {
3045     rtx reduc = gen_reg_rtx (V8HImode);
3046     rtx op1_highpart = gen_reg_rtx (V8QImode);
3047     rtx op2_highpart = gen_reg_rtx (V8QImode);
3048
3049     emit_insn (gen_neon_vabdl<sup>v8qi (reduc,
3050                                         gen_lowpart (V8QImode, operands[1]),
3051                                         gen_lowpart (V8QImode, operands[2])));
3052
3053     emit_insn (gen_neon_vget_highv16qi (op1_highpart, operands[1]));
3054     emit_insn (gen_neon_vget_highv16qi (op2_highpart, operands[2]));
3055     emit_insn (gen_neon_vabal<sup>v8qi (reduc, reduc,
3056                                         op1_highpart, op2_highpart));
3057     emit_insn (gen_neon_vpadal<sup>v8hi (operands[3], operands[3], reduc));
3058
3059     emit_move_insn (operands[0], operands[3]);
3060     DONE;
3061   }
3062 )
3063
3064 (define_insn "neon_v<maxmin><sup><mode>"
3065   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3066         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")
3067                       (match_operand:VDQIW 2 "s_register_operand" "w")]
3068                      VMAXMIN))]
3069   "TARGET_NEON"
3070   "v<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3071   [(set_attr "type" "neon_minmax<q>")]
3072 )
3073
3074 (define_insn "neon_v<maxmin>f<mode>"
3075   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3076         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3077                       (match_operand:VCVTF 2 "s_register_operand" "w")]
3078                      VMAXMINF))]
3079   "TARGET_NEON"
3080   "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3081   [(set_attr "type" "neon_fp_minmax_s<q>")]
3082 )
3083
3084 (define_insn "neon_v<maxmin>f<mode>"
3085  [(set (match_operand:VH 0 "s_register_operand" "=w")
3086    (unspec:VH
3087     [(match_operand:VH 1 "s_register_operand" "w")
3088      (match_operand:VH 2 "s_register_operand" "w")]
3089     VMAXMINF))]
3090  "TARGET_NEON_FP16INST"
3091  "v<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3092  [(set_attr "type" "neon_fp_minmax_s<q>")]
3093 )
3094
3095 (define_insn "neon_vp<maxmin>fv4hf"
3096  [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3097    (unspec:V4HF
3098     [(match_operand:V4HF 1 "s_register_operand" "w")
3099      (match_operand:V4HF 2 "s_register_operand" "w")]
3100     VPMAXMINF))]
3101  "TARGET_NEON_FP16INST"
3102  "vp<maxmin>.f16\t%P0, %P1, %P2"
3103   [(set_attr "type" "neon_reduc_minmax")]
3104 )
3105
3106 (define_insn "neon_<fmaxmin_op><mode>"
3107  [(set
3108    (match_operand:VH 0 "s_register_operand" "=w")
3109    (unspec:VH
3110     [(match_operand:VH 1 "s_register_operand" "w")
3111      (match_operand:VH 2 "s_register_operand" "w")]
3112     VMAXMINFNM))]
3113  "TARGET_NEON_FP16INST"
3114  "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3115  [(set_attr "type" "neon_fp_minmax_s<q>")]
3116 )
3117
3118 ;; v<maxmin>nm intrinsics.
3119 (define_insn "neon_<fmaxmin_op><mode>"
3120   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3121         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3122                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3123                        VMAXMINFNM))]
3124   "TARGET_NEON && TARGET_VFP5"
3125   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3126   [(set_attr "type" "neon_fp_minmax_s<q>")]
3127 )
3128
3129 ;; Vector forms for the IEEE-754 fmax()/fmin() functions
3130 (define_insn "<fmaxmin><mode>3"
3131   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3132         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3133                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3134                        VMAXMINFNM))]
3135   "TARGET_NEON && TARGET_VFP5"
3136   "<fmaxmin_op>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3137   [(set_attr "type" "neon_fp_minmax_s<q>")]
3138 )
3139
3140 (define_expand "neon_vpadd<mode>"
3141   [(match_operand:VD 0 "s_register_operand")
3142    (match_operand:VD 1 "s_register_operand")
3143    (match_operand:VD 2 "s_register_operand")]
3144   "TARGET_NEON"
3145 {
3146   emit_insn (gen_neon_vpadd_internal<mode> (operands[0], operands[1],
3147                                             operands[2]));
3148   DONE;
3149 })
3150
3151 (define_insn "neon_vpaddl<sup><mode>"
3152   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3153         (unspec:<V_double_width> [(match_operand:VDQIW 1 "s_register_operand" "w")]
3154                                  VPADDL))]
3155   "TARGET_NEON"
3156   "vpaddl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3157   [(set_attr "type" "neon_reduc_add_long")]
3158 )
3159
3160 (define_insn "neon_vpadal<sup><mode>"
3161   [(set (match_operand:<V_double_width> 0 "s_register_operand" "=w")
3162         (unspec:<V_double_width> [(match_operand:<V_double_width> 1 "s_register_operand" "0")
3163                                   (match_operand:VDQIW 2 "s_register_operand" "w")]
3164                                  VPADAL))]
3165   "TARGET_NEON"
3166   "vpadal.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
3167   [(set_attr "type" "neon_reduc_add_acc")]
3168 )
3169
3170 (define_insn "neon_vp<maxmin><sup><mode>"
3171   [(set (match_operand:VDI 0 "s_register_operand" "=w")
3172         (unspec:VDI [(match_operand:VDI 1 "s_register_operand" "w")
3173                     (match_operand:VDI 2 "s_register_operand" "w")]
3174                    VPMAXMIN))]
3175   "TARGET_NEON"
3176   "vp<maxmin>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3177   [(set_attr "type" "neon_reduc_minmax<q>")]
3178 )
3179
3180 (define_insn "neon_vp<maxmin>f<mode>"
3181   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3182         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3183                     (match_operand:VCVTF 2 "s_register_operand" "w")]
3184                    VPMAXMINF))]
3185   "TARGET_NEON"
3186   "vp<maxmin>.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3187   [(set_attr "type" "neon_fp_reduc_minmax_s<q>")]
3188 )
3189
3190 (define_insn "neon_vrecps<mode>"
3191   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3192         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3193                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3194                       UNSPEC_VRECPS))]
3195   "TARGET_NEON"
3196   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3197   [(set_attr "type" "neon_fp_recps_s<q>")]
3198 )
3199
3200 (define_insn "neon_vrecps<mode>"
3201   [(set
3202     (match_operand:VH 0 "s_register_operand" "=w")
3203     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3204                 (match_operand:VH 2 "s_register_operand" "w")]
3205      UNSPEC_VRECPS))]
3206   "TARGET_NEON_FP16INST"
3207   "vrecps.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3208   [(set_attr "type" "neon_fp_recps_s<q>")]
3209 )
3210
3211 (define_insn "neon_vrsqrts<mode>"
3212   [(set (match_operand:VCVTF 0 "s_register_operand" "=w")
3213         (unspec:VCVTF [(match_operand:VCVTF 1 "s_register_operand" "w")
3214                        (match_operand:VCVTF 2 "s_register_operand" "w")]
3215                       UNSPEC_VRSQRTS))]
3216   "TARGET_NEON"
3217   "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3218   [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3219 )
3220
3221 (define_insn "neon_vrsqrts<mode>"
3222   [(set
3223     (match_operand:VH 0 "s_register_operand" "=w")
3224     (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
3225                  (match_operand:VH 2 "s_register_operand" "w")]
3226      UNSPEC_VRSQRTS))]
3227  "TARGET_NEON_FP16INST"
3228  "vrsqrts.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
3229  [(set_attr "type" "neon_fp_rsqrts_s<q>")]
3230 )
3231
3232 (define_expand "neon_vabs<mode>"
3233   [(match_operand:VDQW 0 "s_register_operand")
3234    (match_operand:VDQW 1 "s_register_operand")]
3235   "TARGET_NEON"
3236 {
3237   emit_insn (gen_abs<mode>2 (operands[0], operands[1]));
3238   DONE;
3239 })
3240
3241 (define_insn "neon_vqabs<mode>"
3242   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3243         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3244                       UNSPEC_VQABS))]
3245   "TARGET_NEON"
3246   "vqabs.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3247   [(set_attr "type" "neon_qabs<q>")]
3248 )
3249
3250 (define_insn "neon_bswap<mode>"
3251   [(set (match_operand:VDQHSD 0 "register_operand" "=w")
3252         (bswap:VDQHSD (match_operand:VDQHSD 1 "register_operand" "w")))]
3253   "TARGET_NEON"
3254   "vrev<V_sz_elem>.8\\t%<V_reg>0, %<V_reg>1"
3255   [(set_attr "type" "neon_rev<q>")]
3256 )
3257
3258 (define_expand "neon_vneg<mode>"
3259   [(match_operand:VDQW 0 "s_register_operand")
3260    (match_operand:VDQW 1 "s_register_operand")]
3261   "TARGET_NEON"
3262 {
3263   emit_insn (gen_neg<mode>2 (operands[0], operands[1]));
3264   DONE;
3265 })
3266
3267
3268 ;; The vcadd and vcmla patterns are made UNSPEC for the explicitly due to the
3269 ;; fact that their usage need to guarantee that the source vectors are
3270 ;; contiguous.  It would be wrong to describe the operation without being able
3271 ;; to describe the permute that is also required, but even if that is done
3272 ;; the permute would have been created as a LOAD_LANES which means the values
3273 ;; in the registers are in the wrong order.
3274 (define_insn "neon_vcadd<rot><mode>"
3275   [(set (match_operand:VF 0 "register_operand" "=w")
3276         (unspec:VF [(match_operand:VF 1 "register_operand" "w")
3277                     (match_operand:VF 2 "register_operand" "w")]
3278                     VCADD))]
3279   "TARGET_COMPLEX"
3280   "vcadd.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, #<rot>"
3281   [(set_attr "type" "neon_fcadd")]
3282 )
3283
3284 (define_insn "neon_vcmla<rot><mode>"
3285   [(set (match_operand:VF 0 "register_operand" "=w")
3286         (plus:VF (match_operand:VF 1 "register_operand" "0")
3287                  (unspec:VF [(match_operand:VF 2 "register_operand" "w")
3288                              (match_operand:VF 3 "register_operand" "w")]
3289                              VCMLA)))]
3290   "TARGET_COMPLEX"
3291   "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, %<V_reg>3, #<rot>"
3292   [(set_attr "type" "neon_fcmla")]
3293 )
3294
3295 (define_insn "neon_vcmla_lane<rot><mode>"
3296   [(set (match_operand:VF 0 "s_register_operand" "=w")
3297         (plus:VF (match_operand:VF 1 "s_register_operand" "0")
3298                  (unspec:VF [(match_operand:VF 2 "s_register_operand" "w")
3299                              (match_operand:VF 3 "s_register_operand" "<VF_constraint>")
3300                              (match_operand:SI 4 "const_int_operand" "n")]
3301                              VCMLA)))]
3302   "TARGET_COMPLEX"
3303   {
3304     operands = neon_vcmla_lane_prepare_operands (operands);
3305     return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3306   }
3307   [(set_attr "type" "neon_fcmla")]
3308 )
3309
3310 (define_insn "neon_vcmla_laneq<rot><mode>"
3311   [(set (match_operand:VDF 0 "s_register_operand" "=w")
3312         (plus:VDF (match_operand:VDF 1 "s_register_operand" "0")
3313                   (unspec:VDF [(match_operand:VDF 2 "s_register_operand" "w")
3314                               (match_operand:<V_DOUBLE> 3 "s_register_operand" "<VF_constraint>")
3315                               (match_operand:SI 4 "const_int_operand" "n")]
3316                               VCMLA)))]
3317   "TARGET_COMPLEX"
3318   {
3319     operands = neon_vcmla_lane_prepare_operands (operands);
3320     return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3321   }
3322   [(set_attr "type" "neon_fcmla")]
3323 )
3324
3325 (define_insn "neon_vcmlaq_lane<rot><mode>"
3326   [(set (match_operand:VQ_HSF 0 "s_register_operand" "=w")
3327         (plus:VQ_HSF (match_operand:VQ_HSF 1 "s_register_operand" "0")
3328                  (unspec:VQ_HSF [(match_operand:VQ_HSF 2 "s_register_operand" "w")
3329                                  (match_operand:<V_HALF> 3 "s_register_operand" "<VF_constraint>")
3330                                  (match_operand:SI 4 "const_int_operand" "n")]
3331                                  VCMLA)))]
3332   "TARGET_COMPLEX"
3333   {
3334     operands = neon_vcmla_lane_prepare_operands (operands);
3335     return "vcmla.<V_s_elem>\t%<V_reg>0, %<V_reg>2, d%c3[%c4], #<rot>";
3336   }
3337   [(set_attr "type" "neon_fcmla")]
3338 )
3339
3340
3341 ;; These instructions map to the __builtins for the Dot Product operations.
3342 (define_insn "neon_<sup>dot<vsi2qi>"
3343   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3344         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3345                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3346                                                         "register_operand" "w")
3347                                    (match_operand:<VSI2QI> 3
3348                                                         "register_operand" "w")]
3349                 DOTPROD)))]
3350   "TARGET_DOTPROD"
3351   "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %<V_reg>3"
3352   [(set_attr "type" "neon_dot<q>")]
3353 )
3354
3355 ;; These instructions map to the __builtins for the Dot Product
3356 ;; indexed operations.
3357 (define_insn "neon_<sup>dot_lane<vsi2qi>"
3358   [(set (match_operand:VCVTI 0 "register_operand" "=w")
3359         (plus:VCVTI (match_operand:VCVTI 1 "register_operand" "0")
3360                     (unspec:VCVTI [(match_operand:<VSI2QI> 2
3361                                                         "register_operand" "w")
3362                                    (match_operand:V8QI 3 "register_operand" "t")
3363                                    (match_operand:SI 4 "immediate_operand" "i")]
3364                 DOTPROD)))]
3365   "TARGET_DOTPROD"
3366   {
3367     operands[4]
3368       = GEN_INT (NEON_ENDIAN_LANE_N (V8QImode, INTVAL (operands[4])));
3369     return "v<sup>dot.<opsuffix>\\t%<V_reg>0, %<V_reg>2, %P3[%c4]";
3370   }
3371   [(set_attr "type" "neon_dot<q>")]
3372 )
3373
3374 ;; These expands map to the Dot Product optab the vectorizer checks for.
3375 ;; The auto-vectorizer expects a dot product builtin that also does an
3376 ;; accumulation into the provided register.
3377 ;; Given the following pattern
3378 ;;
3379 ;; for (i=0; i<len; i++) {
3380 ;;     c = a[i] * b[i];
3381 ;;     r += c;
3382 ;; }
3383 ;; return result;
3384 ;;
3385 ;; This can be auto-vectorized to
3386 ;; r  = a[0]*b[0] + a[1]*b[1] + a[2]*b[2] + a[3]*b[3];
3387 ;;
3388 ;; given enough iterations.  However the vectorizer can keep unrolling the loop
3389 ;; r += a[4]*b[4] + a[5]*b[5] + a[6]*b[6] + a[7]*b[7];
3390 ;; r += a[8]*b[8] + a[9]*b[9] + a[10]*b[10] + a[11]*b[11];
3391 ;; ...
3392 ;;
3393 ;; and so the vectorizer provides r, in which the result has to be accumulated.
3394 (define_expand "<sup>dot_prod<vsi2qi>"
3395   [(set (match_operand:VCVTI 0 "register_operand")
3396         (plus:VCVTI (unspec:VCVTI [(match_operand:<VSI2QI> 1
3397                                                         "register_operand")
3398                                    (match_operand:<VSI2QI> 2
3399                                                         "register_operand")]
3400                      DOTPROD)
3401                     (match_operand:VCVTI 3 "register_operand")))]
3402   "TARGET_DOTPROD"
3403 {
3404   emit_insn (
3405     gen_neon_<sup>dot<vsi2qi> (operands[3], operands[3], operands[1],
3406                                  operands[2]));
3407   emit_insn (gen_rtx_SET (operands[0], operands[3]));
3408   DONE;
3409 })
3410
3411 (define_expand "neon_copysignf<mode>"
3412   [(match_operand:VCVTF 0 "register_operand")
3413    (match_operand:VCVTF 1 "register_operand")
3414    (match_operand:VCVTF 2 "register_operand")]
3415   "TARGET_NEON"
3416   "{
3417      rtx v_bitmask_cast;
3418      rtx v_bitmask = gen_reg_rtx (<VCVTF:V_cmp_result>mode);
3419      rtx c = gen_int_mode (0x80000000, SImode);
3420
3421      emit_move_insn (v_bitmask,
3422                      gen_const_vec_duplicate (<VCVTF:V_cmp_result>mode, c));
3423      emit_move_insn (operands[0], operands[2]);
3424      v_bitmask_cast = simplify_gen_subreg (<MODE>mode, v_bitmask,
3425                                            <VCVTF:V_cmp_result>mode, 0);
3426      emit_insn (gen_neon_vbsl<mode> (operands[0], v_bitmask_cast, operands[0],
3427                                      operands[1]));
3428
3429      DONE;
3430   }"
3431 )
3432
3433 (define_insn "neon_vqneg<mode>"
3434   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3435         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3436                       UNSPEC_VQNEG))]
3437   "TARGET_NEON"
3438   "vqneg.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3439   [(set_attr "type" "neon_qneg<q>")]
3440 )
3441
3442 (define_insn "neon_vcls<mode>"
3443   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3444         (unspec:VDQIW [(match_operand:VDQIW 1 "s_register_operand" "w")]
3445                       UNSPEC_VCLS))]
3446   "TARGET_NEON"
3447   "vcls.<V_s_elem>\t%<V_reg>0, %<V_reg>1"
3448   [(set_attr "type" "neon_cls<q>")]
3449 )
3450
3451 (define_insn "clz<mode>2"
3452   [(set (match_operand:VDQIW 0 "s_register_operand" "=w")
3453         (clz:VDQIW (match_operand:VDQIW 1 "s_register_operand" "w")))]
3454   "TARGET_NEON"
3455   "vclz.<V_if_elem>\t%<V_reg>0, %<V_reg>1"
3456   [(set_attr "type" "neon_cnt<q>")]
3457 )
3458
3459 (define_expand "neon_vclz<mode>"
3460   [(match_operand:VDQIW 0 "s_register_operand")
3461    (match_operand:VDQIW 1 "s_register_operand")]
3462   "TARGET_NEON"
3463 {
3464   emit_insn (gen_clz<mode>2 (operands[0], operands[1]));
3465   DONE;
3466 })
3467
3468 (define_insn "popcount<mode>2"
3469   [(set (match_operand:VE 0 "s_register_operand" "=w")
3470         (popcount:VE (match_operand:VE 1 "s_register_operand" "w")))]
3471   "TARGET_NEON"
3472   "vcnt.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
3473   [(set_attr "type" "neon_cnt<q>")]
3474 )
3475
3476 (define_expand "neon_vcnt<mode>"
3477   [(match_operand:VE 0 "s_register_operand")
3478    (match_operand:VE 1 "s_register_operand")]
3479   "TARGET_NEON"
3480 {
3481   emit_insn (gen_popcount<mode>2 (operands[0], operands[1]));
3482   DONE;
3483 })
3484
3485 (define_insn "neon_vrecpe<mode>"
3486   [(set (match_operand:VH 0 "s_register_operand" "=w")
3487         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")]
3488                    UNSPEC_VRECPE))]
3489   "TARGET_NEON_FP16INST"
3490   "vrecpe.f16\t%<V_reg>0, %<V_reg>1"
3491   [(set_attr "type" "neon_fp_recpe_s<q>")]
3492 )
3493
3494 (define_insn "neon_vrecpe<mode>"
3495   [(set (match_operand:V32 0 "s_register_operand" "=w")
3496         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3497                     UNSPEC_VRECPE))]
3498   "TARGET_NEON"
3499   "vrecpe.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3500   [(set_attr "type" "neon_fp_recpe_s<q>")]
3501 )
3502
3503 (define_insn "neon_vrsqrte<mode>"
3504   [(set (match_operand:V32 0 "s_register_operand" "=w")
3505         (unspec:V32 [(match_operand:V32 1 "s_register_operand" "w")]
3506                     UNSPEC_VRSQRTE))]
3507   "TARGET_NEON"
3508   "vrsqrte.<V_u_elem>\t%<V_reg>0, %<V_reg>1"
3509   [(set_attr "type" "neon_fp_rsqrte_s<q>")]
3510 )
3511
3512 (define_expand "neon_vmvn<mode>"
3513   [(match_operand:VDQIW 0 "s_register_operand")
3514    (match_operand:VDQIW 1 "s_register_operand")]
3515   "TARGET_NEON"
3516 {
3517   emit_insn (gen_one_cmpl<mode>2 (operands[0], operands[1]));
3518   DONE;
3519 })
3520
3521 (define_insn "neon_vget_lane<mode>_sext_internal"
3522   [(set (match_operand:SI 0 "s_register_operand" "=r")
3523         (sign_extend:SI
3524           (vec_select:<V_elem>
3525             (match_operand:VD 1 "s_register_operand" "w")
3526             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3527   "TARGET_NEON"
3528 {
3529   if (BYTES_BIG_ENDIAN)
3530     {
3531       int elt = INTVAL (operands[2]);
3532       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3533       operands[2] = GEN_INT (elt);
3534     }
3535   return "vmov.s<V_sz_elem>\t%0, %P1[%c2]";
3536 }
3537   [(set_attr "type" "neon_to_gp")]
3538 )
3539
3540 (define_insn "neon_vget_lane<mode>_zext_internal"
3541   [(set (match_operand:SI 0 "s_register_operand" "=r")
3542         (zero_extend:SI
3543           (vec_select:<V_elem>
3544             (match_operand:VD 1 "s_register_operand" "w")
3545             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3546   "TARGET_NEON"
3547 {
3548   if (BYTES_BIG_ENDIAN)
3549     {
3550       int elt = INTVAL (operands[2]);
3551       elt = GET_MODE_NUNITS (<MODE>mode) - 1 - elt;
3552       operands[2] = GEN_INT (elt);
3553     }
3554   return "vmov.u<V_sz_elem>\t%0, %P1[%c2]";
3555 }
3556   [(set_attr "type" "neon_to_gp")]
3557 )
3558
3559 (define_insn "neon_vget_lane<mode>_sext_internal"
3560   [(set (match_operand:SI 0 "s_register_operand" "=r")
3561         (sign_extend:SI
3562           (vec_select:<V_elem>
3563             (match_operand:VQ2 1 "s_register_operand" "w")
3564             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3565   "TARGET_NEON"
3566 {
3567   rtx ops[3];
3568   int regno = REGNO (operands[1]);
3569   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3570   unsigned int elt = INTVAL (operands[2]);
3571   unsigned int elt_adj = elt % halfelts;
3572
3573   if (BYTES_BIG_ENDIAN)
3574     elt_adj = halfelts - 1 - elt_adj;
3575
3576   ops[0] = operands[0];
3577   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3578   ops[2] = GEN_INT (elt_adj);
3579   output_asm_insn ("vmov.s<V_sz_elem>\t%0, %P1[%c2]", ops);
3580
3581   return "";
3582 }
3583   [(set_attr "type" "neon_to_gp_q")]
3584 )
3585
3586 (define_insn "neon_vget_lane<mode>_zext_internal"
3587   [(set (match_operand:SI 0 "s_register_operand" "=r")
3588         (zero_extend:SI
3589           (vec_select:<V_elem>
3590             (match_operand:VQ2 1 "s_register_operand" "w")
3591             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3592   "TARGET_NEON"
3593 {
3594   rtx ops[3];
3595   int regno = REGNO (operands[1]);
3596   unsigned int halfelts = GET_MODE_NUNITS (<MODE>mode) / 2;
3597   unsigned int elt = INTVAL (operands[2]);
3598   unsigned int elt_adj = elt % halfelts;
3599
3600   if (BYTES_BIG_ENDIAN)
3601     elt_adj = halfelts - 1 - elt_adj;
3602
3603   ops[0] = operands[0];
3604   ops[1] = gen_rtx_REG (<V_HALF>mode, regno + 2 * (elt / halfelts));
3605   ops[2] = GEN_INT (elt_adj);
3606   output_asm_insn ("vmov.u<V_sz_elem>\t%0, %P1[%c2]", ops);
3607
3608   return "";
3609 }
3610   [(set_attr "type" "neon_to_gp_q")]
3611 )
3612
3613 (define_expand "neon_vget_lane<mode>"
3614   [(match_operand:<V_ext> 0 "s_register_operand")
3615    (match_operand:VDQW 1 "s_register_operand")
3616    (match_operand:SI 2 "immediate_operand")]
3617   "TARGET_NEON"
3618 {
3619   if (BYTES_BIG_ENDIAN)
3620     {
3621       /* The intrinsics are defined in terms of a model where the
3622          element ordering in memory is vldm order, whereas the generic
3623          RTL is defined in terms of a model where the element ordering
3624          in memory is array order.  Convert the lane number to conform
3625          to this model.  */
3626       unsigned int elt = INTVAL (operands[2]);
3627       unsigned int reg_nelts
3628         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3629       elt ^= reg_nelts - 1;
3630       operands[2] = GEN_INT (elt);
3631     }
3632
3633   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3634     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3635                                                 operands[2]));
3636   else
3637     emit_insn (gen_neon_vget_lane<mode>_sext_internal (operands[0],
3638                                                        operands[1],
3639                                                        operands[2]));
3640   DONE;
3641 })
3642
3643 (define_expand "neon_vget_laneu<mode>"
3644   [(match_operand:<V_ext> 0 "s_register_operand")
3645    (match_operand:VDQIW 1 "s_register_operand")
3646    (match_operand:SI 2 "immediate_operand")]
3647   "TARGET_NEON"
3648 {
3649   if (BYTES_BIG_ENDIAN)
3650     {
3651       /* The intrinsics are defined in terms of a model where the
3652          element ordering in memory is vldm order, whereas the generic
3653          RTL is defined in terms of a model where the element ordering
3654          in memory is array order.  Convert the lane number to conform
3655          to this model.  */
3656       unsigned int elt = INTVAL (operands[2]);
3657       unsigned int reg_nelts
3658         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3659       elt ^= reg_nelts - 1;
3660       operands[2] = GEN_INT (elt);
3661     }
3662
3663   if (GET_MODE_UNIT_BITSIZE (<MODE>mode) == 32)
3664     emit_insn (gen_vec_extract<mode><V_elem_l> (operands[0], operands[1],
3665                                                 operands[2]));
3666   else
3667     emit_insn (gen_neon_vget_lane<mode>_zext_internal (operands[0],
3668                                                        operands[1],
3669                                                        operands[2]));
3670   DONE;
3671 })
3672
3673 (define_expand "neon_vget_lanedi"
3674   [(match_operand:DI 0 "s_register_operand")
3675    (match_operand:DI 1 "s_register_operand")
3676    (match_operand:SI 2 "immediate_operand")]
3677   "TARGET_NEON"
3678 {
3679   emit_move_insn (operands[0], operands[1]);
3680   DONE;
3681 })
3682
3683 (define_expand "neon_vget_lanev2di"
3684   [(match_operand:DI 0 "s_register_operand")
3685    (match_operand:V2DI 1 "s_register_operand")
3686    (match_operand:SI 2 "immediate_operand")]
3687   "TARGET_NEON"
3688 {
3689   int lane;
3690
3691 if (BYTES_BIG_ENDIAN)
3692     {
3693       /* The intrinsics are defined in terms of a model where the
3694          element ordering in memory is vldm order, whereas the generic
3695          RTL is defined in terms of a model where the element ordering
3696          in memory is array order.  Convert the lane number to conform
3697          to this model.  */
3698       unsigned int elt = INTVAL (operands[2]);
3699       unsigned int reg_nelts = 2;
3700       elt ^= reg_nelts - 1;
3701       operands[2] = GEN_INT (elt);
3702     }
3703
3704   lane = INTVAL (operands[2]);
3705   gcc_assert ((lane ==0) || (lane == 1));
3706   emit_move_insn (operands[0], lane == 0
3707                                 ? gen_lowpart (DImode, operands[1])
3708                                 : gen_highpart (DImode, operands[1]));
3709   DONE;
3710 })
3711
3712 (define_expand "neon_vset_lane<mode>"
3713   [(match_operand:VDQ 0 "s_register_operand")
3714    (match_operand:<V_elem> 1 "s_register_operand")
3715    (match_operand:VDQ 2 "s_register_operand")
3716    (match_operand:SI 3 "immediate_operand")]
3717   "TARGET_NEON"
3718 {
3719   unsigned int elt = INTVAL (operands[3]);
3720
3721   if (BYTES_BIG_ENDIAN)
3722     {
3723       unsigned int reg_nelts
3724         = 64 / GET_MODE_UNIT_BITSIZE (<MODE>mode);
3725       elt ^= reg_nelts - 1;
3726     }
3727
3728   emit_insn (gen_vec_set<mode>_internal (operands[0], operands[1],
3729                                          GEN_INT (1 << elt), operands[2]));
3730   DONE;
3731 })
3732
3733 ; See neon_vget_lanedi comment for reasons operands 2 & 3 are ignored.
3734
3735 (define_expand "neon_vset_lanedi"
3736   [(match_operand:DI 0 "s_register_operand")
3737    (match_operand:DI 1 "s_register_operand")
3738    (match_operand:DI 2 "s_register_operand")
3739    (match_operand:SI 3 "immediate_operand")]
3740   "TARGET_NEON"
3741 {
3742   emit_move_insn (operands[0], operands[1]);
3743   DONE;
3744 })
3745
3746 (define_expand "neon_vcreate<mode>"
3747   [(match_operand:VD_RE 0 "s_register_operand")
3748    (match_operand:DI 1 "general_operand")]
3749   "TARGET_NEON"
3750 {
3751   rtx src = gen_lowpart (<MODE>mode, operands[1]);
3752   emit_move_insn (operands[0], src);
3753   DONE;
3754 })
3755
3756 (define_insn "neon_vdup_n<mode>"
3757   [(set (match_operand:VX 0 "s_register_operand" "=w")
3758         (vec_duplicate:VX (match_operand:<V_elem> 1 "s_register_operand" "r")))]
3759   "TARGET_NEON"
3760   "vdup.<V_sz_elem>\t%<V_reg>0, %1"
3761   [(set_attr "type" "neon_from_gp<q>")]
3762 )
3763
3764 (define_insn "neon_vdup_nv4hf"
3765   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
3766         (vec_duplicate:V4HF (match_operand:HF 1 "s_register_operand" "r")))]
3767   "TARGET_NEON"
3768   "vdup.16\t%P0, %1"
3769   [(set_attr "type" "neon_from_gp")]
3770 )
3771
3772 (define_insn "neon_vdup_nv8hf"
3773   [(set (match_operand:V8HF 0 "s_register_operand" "=w")
3774         (vec_duplicate:V8HF (match_operand:HF 1 "s_register_operand" "r")))]
3775   "TARGET_NEON"
3776   "vdup.16\t%q0, %1"
3777   [(set_attr "type" "neon_from_gp_q")]
3778 )
3779
3780 (define_insn "neon_vdup_n<mode>"
3781   [(set (match_operand:V32 0 "s_register_operand" "=w,w")
3782         (vec_duplicate:V32 (match_operand:<V_elem> 1 "s_register_operand" "r,t")))]
3783   "TARGET_NEON"
3784   "@
3785   vdup.<V_sz_elem>\t%<V_reg>0, %1
3786   vdup.<V_sz_elem>\t%<V_reg>0, %y1"
3787   [(set_attr "type" "neon_from_gp<q>,neon_dup<q>")]
3788 )
3789
3790 (define_expand "neon_vdup_ndi"
3791   [(match_operand:DI 0 "s_register_operand")
3792    (match_operand:DI 1 "s_register_operand")]
3793   "TARGET_NEON"
3794 {
3795   emit_move_insn (operands[0], operands[1]);
3796   DONE;
3797 }
3798 )
3799
3800 (define_insn "neon_vdup_nv2di"
3801   [(set (match_operand:V2DI 0 "s_register_operand" "=w,w")
3802         (vec_duplicate:V2DI (match_operand:DI 1 "s_register_operand" "r,w")))]
3803   "TARGET_NEON"
3804   "@
3805   vmov\t%e0, %Q1, %R1\;vmov\t%f0, %Q1, %R1
3806   vmov\t%e0, %P1\;vmov\t%f0, %P1"
3807   [(set_attr "length" "8")
3808    (set_attr "type" "multiple")]
3809 )
3810
3811 (define_insn "neon_vdup_lane<mode>_internal"
3812   [(set (match_operand:VDQW 0 "s_register_operand" "=w")
3813         (vec_duplicate:VDQW
3814           (vec_select:<V_elem>
3815             (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3816             (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3817   "TARGET_NEON"
3818 {
3819   if (BYTES_BIG_ENDIAN)
3820     {
3821       int elt = INTVAL (operands[2]);
3822       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3823       operands[2] = GEN_INT (elt);
3824     }
3825   if (<Is_d_reg>)
3826     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3827   else
3828     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3829 }
3830   [(set_attr "type" "neon_dup<q>")]
3831 )
3832
3833 (define_insn "neon_vdup_lane<mode>_internal"
3834  [(set (match_operand:VH 0 "s_register_operand" "=w")
3835    (vec_duplicate:VH
3836     (vec_select:<V_elem>
3837      (match_operand:<V_double_vector_mode> 1 "s_register_operand" "w")
3838      (parallel [(match_operand:SI 2 "immediate_operand" "i")]))))]
3839  "TARGET_NEON && TARGET_FP16"
3840 {
3841   if (BYTES_BIG_ENDIAN)
3842     {
3843       int elt = INTVAL (operands[2]);
3844       elt = GET_MODE_NUNITS (<V_double_vector_mode>mode) - 1 - elt;
3845       operands[2] = GEN_INT (elt);
3846     }
3847   if (<Is_d_reg>)
3848     return "vdup.<V_sz_elem>\t%P0, %P1[%c2]";
3849   else
3850     return "vdup.<V_sz_elem>\t%q0, %P1[%c2]";
3851 }
3852   [(set_attr "type" "neon_dup<q>")]
3853 )
3854
3855 (define_expand "neon_vdup_lane<mode>"
3856   [(match_operand:VDQW 0 "s_register_operand")
3857    (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3858    (match_operand:SI 2 "immediate_operand")]
3859   "TARGET_NEON"
3860 {
3861   if (BYTES_BIG_ENDIAN)
3862     {
3863       unsigned int elt = INTVAL (operands[2]);
3864       unsigned int reg_nelts
3865         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3866       elt ^= reg_nelts - 1;
3867       operands[2] = GEN_INT (elt);
3868     }
3869     emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3870                                                   operands[2]));
3871     DONE;
3872 })
3873
3874 (define_expand "neon_vdup_lane<mode>"
3875   [(match_operand:VH 0 "s_register_operand")
3876    (match_operand:<V_double_vector_mode> 1 "s_register_operand")
3877    (match_operand:SI 2 "immediate_operand")]
3878   "TARGET_NEON && TARGET_FP16"
3879 {
3880   if (BYTES_BIG_ENDIAN)
3881     {
3882       unsigned int elt = INTVAL (operands[2]);
3883       unsigned int reg_nelts
3884         = 64 / GET_MODE_UNIT_BITSIZE (<V_double_vector_mode>mode);
3885       elt ^= reg_nelts - 1;
3886       operands[2] = GEN_INT (elt);
3887     }
3888   emit_insn (gen_neon_vdup_lane<mode>_internal (operands[0], operands[1],
3889                                                 operands[2]));
3890   DONE;
3891 })
3892
3893 ; Scalar index is ignored, since only zero is valid here.
3894 (define_expand "neon_vdup_lanedi"
3895   [(match_operand:DI 0 "s_register_operand")
3896    (match_operand:DI 1 "s_register_operand")
3897    (match_operand:SI 2 "immediate_operand")]
3898   "TARGET_NEON"
3899 {
3900   emit_move_insn (operands[0], operands[1]);
3901   DONE;
3902 })
3903
3904 ; Likewise for v2di, as the DImode second operand has only a single element.
3905 (define_expand "neon_vdup_lanev2di"
3906   [(match_operand:V2DI 0 "s_register_operand")
3907    (match_operand:DI 1 "s_register_operand")
3908    (match_operand:SI 2 "immediate_operand")]
3909   "TARGET_NEON"
3910 {
3911   emit_insn (gen_neon_vdup_nv2di (operands[0], operands[1]));
3912   DONE;
3913 })
3914
3915 ; Disabled before reload because we don't want combine doing something silly,
3916 ; but used by the post-reload expansion of neon_vcombine.
3917 (define_insn "*neon_vswp<mode>"
3918   [(set (match_operand:VDQX 0 "s_register_operand" "+w")
3919         (match_operand:VDQX 1 "s_register_operand" "+w"))
3920    (set (match_dup 1) (match_dup 0))]
3921   "TARGET_NEON && reload_completed"
3922   "vswp\t%<V_reg>0, %<V_reg>1"
3923   [(set_attr "type" "neon_permute<q>")]
3924 )
3925
3926 ;; In this insn, operand 1 should be low, and operand 2 the high part of the
3927 ;; dest vector.
3928 ;; FIXME: A different implementation of this builtin could make it much
3929 ;; more likely that we wouldn't actually need to output anything (we could make
3930 ;; it so that the reg allocator puts things in the right places magically
3931 ;; instead). Lack of subregs for vectors makes that tricky though, I think.
3932
3933 (define_insn_and_split "neon_vcombine<mode>"
3934   [(set (match_operand:<V_DOUBLE> 0 "s_register_operand" "=w")
3935         (vec_concat:<V_DOUBLE>
3936           (match_operand:VDX 1 "s_register_operand" "w")
3937           (match_operand:VDX 2 "s_register_operand" "w")))]
3938   "TARGET_NEON"
3939   "#"
3940   "&& reload_completed"
3941   [(const_int 0)]
3942 {
3943   neon_split_vcombine (operands);
3944   DONE;
3945 }
3946 [(set_attr "type" "multiple")]
3947 )
3948
3949 (define_expand "neon_vget_high<mode>"
3950   [(match_operand:<V_HALF> 0 "s_register_operand")
3951    (match_operand:VQX 1 "s_register_operand")]
3952   "TARGET_NEON"
3953 {
3954   emit_move_insn (operands[0],
3955                   simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
3956                                        GET_MODE_SIZE (<V_HALF>mode)));
3957   DONE;
3958 })
3959
3960 (define_expand "neon_vget_low<mode>"
3961   [(match_operand:<V_HALF> 0 "s_register_operand")
3962    (match_operand:VQX 1 "s_register_operand")]
3963   "TARGET_NEON"
3964 {
3965   emit_move_insn (operands[0],
3966                   simplify_gen_subreg (<V_HALF>mode, operands[1],
3967                                        <MODE>mode, 0));
3968   DONE;
3969 })
3970
3971 (define_insn "float<mode><V_cvtto>2"
3972   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3973         (float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3974   "TARGET_NEON && !flag_rounding_math"
3975   "vcvt.f32.s32\t%<V_reg>0, %<V_reg>1"
3976   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3977 )
3978
3979 (define_insn "floatuns<mode><V_cvtto>2"
3980   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3981         (unsigned_float:<V_CVTTO> (match_operand:VCVTI 1 "s_register_operand" "w")))]
3982   "TARGET_NEON && !flag_rounding_math"
3983   "vcvt.f32.u32\t%<V_reg>0, %<V_reg>1"
3984   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
3985 )
3986
3987 (define_insn "fix_trunc<mode><V_cvtto>2"
3988   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3989         (fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3990   "TARGET_NEON"
3991   "vcvt.s32.f32\t%<V_reg>0, %<V_reg>1"
3992   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
3993 )
3994
3995 (define_insn "fixuns_trunc<mode><V_cvtto>2"
3996   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
3997         (unsigned_fix:<V_CVTTO> (match_operand:VCVTF 1 "s_register_operand" "w")))]
3998   "TARGET_NEON"
3999   "vcvt.u32.f32\t%<V_reg>0, %<V_reg>1"
4000   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4001 )
4002
4003 (define_insn "neon_vcvt<sup><mode>"
4004   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4005         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")]
4006                           VCVT_US))]
4007   "TARGET_NEON"
4008   "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1"
4009   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4010 )
4011
4012 (define_insn "neon_vcvt<sup><mode>"
4013   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4014         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")]
4015                           VCVT_US))]
4016   "TARGET_NEON"
4017   "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1"
4018   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4019 )
4020
4021 (define_insn "neon_vcvtv4sfv4hf"
4022   [(set (match_operand:V4SF 0 "s_register_operand" "=w")
4023         (unspec:V4SF [(match_operand:V4HF 1 "s_register_operand" "w")]
4024                           UNSPEC_VCVT))]
4025   "TARGET_NEON && TARGET_FP16"
4026   "vcvt.f32.f16\t%q0, %P1"
4027   [(set_attr "type" "neon_fp_cvt_widen_h")]
4028 )
4029
4030 (define_insn "neon_vcvtv4hfv4sf"
4031   [(set (match_operand:V4HF 0 "s_register_operand" "=w")
4032         (unspec:V4HF [(match_operand:V4SF 1 "s_register_operand" "w")]
4033                           UNSPEC_VCVT))]
4034   "TARGET_NEON && TARGET_FP16"
4035   "vcvt.f16.f32\t%P0, %q1"
4036   [(set_attr "type" "neon_fp_cvt_narrow_s_q")]
4037 )
4038
4039 (define_insn "neon_vcvt<sup><mode>"
4040  [(set
4041    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4042    (unspec:<VH_CVTTO>
4043     [(match_operand:VCVTHI 1 "s_register_operand" "w")]
4044     VCVT_US))]
4045  "TARGET_NEON_FP16INST"
4046  "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1"
4047   [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4048 )
4049
4050 (define_insn "neon_vcvt<sup><mode>"
4051  [(set
4052    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4053    (unspec:<VH_CVTTO>
4054     [(match_operand:VH 1 "s_register_operand" "w")]
4055     VCVT_US))]
4056  "TARGET_NEON_FP16INST"
4057  "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4058   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4059 )
4060
4061 (define_insn "neon_vcvt<sup>_n<mode>"
4062   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4063         (unspec:<V_CVTTO> [(match_operand:VCVTF 1 "s_register_operand" "w")
4064                            (match_operand:SI 2 "immediate_operand" "i")]
4065                           VCVT_US_N))]
4066   "TARGET_NEON"
4067 {
4068   arm_const_bounds (operands[2], 1, 33);
4069   return "vcvt.<sup>%#32.f32\t%<V_reg>0, %<V_reg>1, %2";
4070 }
4071   [(set_attr "type" "neon_fp_to_int_<V_elem_ch><q>")]
4072 )
4073
4074 (define_insn "neon_vcvt<sup>_n<mode>"
4075  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4076    (unspec:<VH_CVTTO>
4077     [(match_operand:VH 1 "s_register_operand" "w")
4078      (match_operand:SI 2 "immediate_operand" "i")]
4079     VCVT_US_N))]
4080   "TARGET_NEON_FP16INST"
4081 {
4082   arm_const_bounds (operands[2], 0, 17);
4083   return "vcvt.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1, %2";
4084 }
4085  [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4086 )
4087
4088 (define_insn "neon_vcvt<sup>_n<mode>"
4089   [(set (match_operand:<V_CVTTO> 0 "s_register_operand" "=w")
4090         (unspec:<V_CVTTO> [(match_operand:VCVTI 1 "s_register_operand" "w")
4091                            (match_operand:SI 2 "immediate_operand" "i")]
4092                           VCVT_US_N))]
4093   "TARGET_NEON"
4094 {
4095   arm_const_bounds (operands[2], 1, 33);
4096   return "vcvt.f32.<sup>%#32\t%<V_reg>0, %<V_reg>1, %2";
4097 }
4098   [(set_attr "type" "neon_int_to_fp_<V_elem_ch><q>")]
4099 )
4100
4101 (define_insn "neon_vcvt<sup>_n<mode>"
4102  [(set (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4103    (unspec:<VH_CVTTO>
4104     [(match_operand:VCVTHI 1 "s_register_operand" "w")
4105      (match_operand:SI 2 "immediate_operand" "i")]
4106     VCVT_US_N))]
4107  "TARGET_NEON_FP16INST"
4108 {
4109   arm_const_bounds (operands[2], 0, 17);
4110   return "vcvt.f16.<sup>%#16\t%<V_reg>0, %<V_reg>1, %2";
4111 }
4112  [(set_attr "type" "neon_int_to_fp_<VH_elem_ch><q>")]
4113 )
4114
4115 (define_insn "neon_vcvt<vcvth_op><sup><mode>"
4116  [(set
4117    (match_operand:<VH_CVTTO> 0 "s_register_operand" "=w")
4118    (unspec:<VH_CVTTO>
4119     [(match_operand:VH 1 "s_register_operand" "w")]
4120     VCVT_HF_US))]
4121  "TARGET_NEON_FP16INST"
4122  "vcvt<vcvth_op>.<sup>%#16.f16\t%<V_reg>0, %<V_reg>1"
4123   [(set_attr "type" "neon_fp_to_int_<VH_elem_ch><q>")]
4124 )
4125
4126 (define_insn "neon_vmovn<mode>"
4127   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4128         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4129                            UNSPEC_VMOVN))]
4130   "TARGET_NEON"
4131   "vmovn.<V_if_elem>\t%P0, %q1"
4132   [(set_attr "type" "neon_shift_imm_narrow_q")]
4133 )
4134
4135 (define_insn "neon_vqmovn<sup><mode>"
4136   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4137         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4138                            VQMOVN))]
4139   "TARGET_NEON"
4140   "vqmovn.<sup>%#<V_sz_elem>\t%P0, %q1"
4141   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4142 )
4143
4144 (define_insn "neon_vqmovun<mode>"
4145   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4146         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")]
4147                            UNSPEC_VQMOVUN))]
4148   "TARGET_NEON"
4149   "vqmovun.<V_s_elem>\t%P0, %q1"
4150   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4151 )
4152
4153 (define_insn "neon_vmovl<sup><mode>"
4154   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4155         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")]
4156                           VMOVL))]
4157   "TARGET_NEON"
4158   "vmovl.<sup>%#<V_sz_elem>\t%q0, %P1"
4159   [(set_attr "type" "neon_shift_imm_long")]
4160 )
4161
4162 (define_insn "neon_vmul_lane<mode>"
4163   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4164         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "w")
4165                      (match_operand:VMD 2 "s_register_operand"
4166                                         "<scalar_mul_constraint>")
4167                      (match_operand:SI 3 "immediate_operand" "i")]
4168                     UNSPEC_VMUL_LANE))]
4169   "TARGET_NEON"
4170 {
4171   return "vmul.<V_if_elem>\t%P0, %P1, %P2[%c3]";
4172 }
4173   [(set (attr "type")
4174      (if_then_else (match_test "<Is_float_mode>")
4175                    (const_string "neon_fp_mul_s_scalar<q>")
4176                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4177 )
4178
4179 (define_insn "neon_vmul_lane<mode>"
4180   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4181         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "w")
4182                      (match_operand:<V_HALF> 2 "s_register_operand"
4183                                              "<scalar_mul_constraint>")
4184                      (match_operand:SI 3 "immediate_operand" "i")]
4185                     UNSPEC_VMUL_LANE))]
4186   "TARGET_NEON"
4187 {
4188   return "vmul.<V_if_elem>\t%q0, %q1, %P2[%c3]";
4189 }
4190   [(set (attr "type")
4191      (if_then_else (match_test "<Is_float_mode>")
4192                    (const_string "neon_fp_mul_s_scalar<q>")
4193                    (const_string "neon_mul_<V_elem_ch>_scalar<q>")))]
4194 )
4195
4196 (define_insn "neon_vmul_lane<mode>"
4197   [(set (match_operand:VH 0 "s_register_operand" "=w")
4198         (unspec:VH [(match_operand:VH 1 "s_register_operand" "w")
4199                     (match_operand:V4HF 2 "s_register_operand"
4200                      "<scalar_mul_constraint>")
4201                      (match_operand:SI 3 "immediate_operand" "i")]
4202                      UNSPEC_VMUL_LANE))]
4203   "TARGET_NEON_FP16INST"
4204   "vmul.f16\t%<V_reg>0, %<V_reg>1, %P2[%c3]"
4205   [(set_attr "type" "neon_fp_mul_s_scalar<q>")]
4206 )
4207
4208 (define_insn "neon_vmull<sup>_lane<mode>"
4209   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4210         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4211                            (match_operand:VMDI 2 "s_register_operand"
4212                                                "<scalar_mul_constraint>")
4213                            (match_operand:SI 3 "immediate_operand" "i")]
4214                           VMULL_LANE))]
4215   "TARGET_NEON"
4216 {
4217   return "vmull.<sup>%#<V_sz_elem>\t%q0, %P1, %P2[%c3]";
4218 }
4219   [(set_attr "type" "neon_mul_<V_elem_ch>_scalar_long")]
4220 )
4221
4222 (define_insn "neon_vqdmull_lane<mode>"
4223   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4224         (unspec:<V_widen> [(match_operand:VMDI 1 "s_register_operand" "w")
4225                            (match_operand:VMDI 2 "s_register_operand"
4226                                                "<scalar_mul_constraint>")
4227                            (match_operand:SI 3 "immediate_operand" "i")]
4228                           UNSPEC_VQDMULL_LANE))]
4229   "TARGET_NEON"
4230 {
4231   return "vqdmull.<V_s_elem>\t%q0, %P1, %P2[%c3]";
4232 }
4233   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_long")]
4234 )
4235
4236 (define_insn "neon_vq<r>dmulh_lane<mode>"
4237   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4238         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "w")
4239                       (match_operand:<V_HALF> 2 "s_register_operand"
4240                                               "<scalar_mul_constraint>")
4241                       (match_operand:SI 3 "immediate_operand" "i")]
4242                       VQDMULH_LANE))]
4243   "TARGET_NEON"
4244 {
4245   return "vq<r>dmulh.<V_s_elem>\t%q0, %q1, %P2[%c3]";
4246 }
4247   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4248 )
4249
4250 (define_insn "neon_vq<r>dmulh_lane<mode>"
4251   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4252         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "w")
4253                       (match_operand:VMDI 2 "s_register_operand"
4254                                           "<scalar_mul_constraint>")
4255                       (match_operand:SI 3 "immediate_operand" "i")]
4256                       VQDMULH_LANE))]
4257   "TARGET_NEON"
4258 {
4259   return "vq<r>dmulh.<V_s_elem>\t%P0, %P1, %P2[%c3]";
4260 }
4261   [(set_attr "type" "neon_sat_mul_<V_elem_ch>_scalar_q")]
4262 )
4263
4264 ;; vqrdmlah_lane, vqrdmlsh_lane
4265 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4266   [(set (match_operand:VMQI 0 "s_register_operand" "=w")
4267         (unspec:VMQI [(match_operand:VMQI 1 "s_register_operand" "0")
4268                       (match_operand:VMQI 2 "s_register_operand" "w")
4269                       (match_operand:<V_HALF> 3 "s_register_operand"
4270                                           "<scalar_mul_constraint>")
4271                       (match_operand:SI 4 "immediate_operand" "i")]
4272                      VQRDMLH_AS))]
4273   "TARGET_NEON_RDMA"
4274 {
4275   return
4276    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%q0, %q2, %P3[%c4]";
4277 }
4278   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar<q>")]
4279 )
4280
4281 (define_insn "neon_vqrdml<VQRDMLH_AS:neon_rdma_as>h_lane<mode>"
4282   [(set (match_operand:VMDI 0 "s_register_operand" "=w")
4283         (unspec:VMDI [(match_operand:VMDI 1 "s_register_operand" "0")
4284                       (match_operand:VMDI 2 "s_register_operand" "w")
4285                       (match_operand:VMDI 3 "s_register_operand"
4286                                           "<scalar_mul_constraint>")
4287                       (match_operand:SI 4 "immediate_operand" "i")]
4288                      VQRDMLH_AS))]
4289   "TARGET_NEON_RDMA"
4290 {
4291   return
4292    "vqrdml<VQRDMLH_AS:neon_rdma_as>h.<V_s_elem>\t%P0, %P2, %P3[%c4]";
4293 }
4294   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar")]
4295 )
4296
4297 (define_insn "neon_vmla_lane<mode>"
4298   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4299         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4300                      (match_operand:VMD 2 "s_register_operand" "w")
4301                      (match_operand:VMD 3 "s_register_operand"
4302                                         "<scalar_mul_constraint>")
4303                      (match_operand:SI 4 "immediate_operand" "i")]
4304                      UNSPEC_VMLA_LANE))]
4305   "TARGET_NEON"
4306 {
4307   return "vmla.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4308 }
4309   [(set (attr "type")
4310      (if_then_else (match_test "<Is_float_mode>")
4311                    (const_string "neon_fp_mla_s_scalar<q>")
4312                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4313 )
4314
4315 (define_insn "neon_vmla_lane<mode>"
4316   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4317         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4318                      (match_operand:VMQ 2 "s_register_operand" "w")
4319                      (match_operand:<V_HALF> 3 "s_register_operand"
4320                                              "<scalar_mul_constraint>")
4321                      (match_operand:SI 4 "immediate_operand" "i")]
4322                      UNSPEC_VMLA_LANE))]
4323   "TARGET_NEON"
4324 {
4325   return "vmla.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4326 }
4327   [(set (attr "type")
4328      (if_then_else (match_test "<Is_float_mode>")
4329                    (const_string "neon_fp_mla_s_scalar<q>")
4330                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4331 )
4332
4333 (define_insn "neon_vmlal<sup>_lane<mode>"
4334   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4335         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4336                            (match_operand:VMDI 2 "s_register_operand" "w")
4337                            (match_operand:VMDI 3 "s_register_operand"
4338                                                "<scalar_mul_constraint>")
4339                            (match_operand:SI 4 "immediate_operand" "i")]
4340                           VMLAL_LANE))]
4341   "TARGET_NEON"
4342 {
4343   return "vmlal.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4344 }
4345   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4346 )
4347
4348 (define_insn "neon_vqdmlal_lane<mode>"
4349   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4350         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4351                            (match_operand:VMDI 2 "s_register_operand" "w")
4352                            (match_operand:VMDI 3 "s_register_operand"
4353                                                "<scalar_mul_constraint>")
4354                            (match_operand:SI 4 "immediate_operand" "i")]
4355                           UNSPEC_VQDMLAL_LANE))]
4356   "TARGET_NEON"
4357 {
4358   return "vqdmlal.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4359 }
4360   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4361 )
4362
4363 (define_insn "neon_vmls_lane<mode>"
4364   [(set (match_operand:VMD 0 "s_register_operand" "=w")
4365         (unspec:VMD [(match_operand:VMD 1 "s_register_operand" "0")
4366                      (match_operand:VMD 2 "s_register_operand" "w")
4367                      (match_operand:VMD 3 "s_register_operand"
4368                                         "<scalar_mul_constraint>")
4369                      (match_operand:SI 4 "immediate_operand" "i")]
4370                     UNSPEC_VMLS_LANE))]
4371   "TARGET_NEON"
4372 {
4373   return "vmls.<V_if_elem>\t%P0, %P2, %P3[%c4]";
4374 }
4375   [(set (attr "type")
4376      (if_then_else (match_test "<Is_float_mode>")
4377                    (const_string "neon_fp_mla_s_scalar<q>")
4378                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4379 )
4380
4381 (define_insn "neon_vmls_lane<mode>"
4382   [(set (match_operand:VMQ 0 "s_register_operand" "=w")
4383         (unspec:VMQ [(match_operand:VMQ 1 "s_register_operand" "0")
4384                      (match_operand:VMQ 2 "s_register_operand" "w")
4385                      (match_operand:<V_HALF> 3 "s_register_operand"
4386                                              "<scalar_mul_constraint>")
4387                      (match_operand:SI 4 "immediate_operand" "i")]
4388                     UNSPEC_VMLS_LANE))]
4389   "TARGET_NEON"
4390 {
4391   return "vmls.<V_if_elem>\t%q0, %q2, %P3[%c4]";
4392 }
4393   [(set (attr "type")
4394      (if_then_else (match_test "<Is_float_mode>")
4395                    (const_string "neon_fp_mla_s_scalar<q>")
4396                    (const_string "neon_mla_<V_elem_ch>_scalar<q>")))]
4397 )
4398
4399 (define_insn "neon_vmlsl<sup>_lane<mode>"
4400   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4401         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4402                            (match_operand:VMDI 2 "s_register_operand" "w")
4403                            (match_operand:VMDI 3 "s_register_operand"
4404                                                "<scalar_mul_constraint>")
4405                            (match_operand:SI 4 "immediate_operand" "i")]
4406                           VMLSL_LANE))]
4407   "TARGET_NEON"
4408 {
4409   return "vmlsl.<sup>%#<V_sz_elem>\t%q0, %P2, %P3[%c4]";
4410 }
4411   [(set_attr "type" "neon_mla_<V_elem_ch>_scalar_long")]
4412 )
4413
4414 (define_insn "neon_vqdmlsl_lane<mode>"
4415   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4416         (unspec:<V_widen> [(match_operand:<V_widen> 1 "s_register_operand" "0")
4417                            (match_operand:VMDI 2 "s_register_operand" "w")
4418                            (match_operand:VMDI 3 "s_register_operand"
4419                                                "<scalar_mul_constraint>")
4420                            (match_operand:SI 4 "immediate_operand" "i")]
4421                           UNSPEC_VQDMLSL_LANE))]
4422   "TARGET_NEON"
4423 {
4424   return "vqdmlsl.<V_s_elem>\t%q0, %P2, %P3[%c4]";
4425 }
4426   [(set_attr "type" "neon_sat_mla_<V_elem_ch>_scalar_long")]
4427 )
4428
4429 ; FIXME: For the "_n" multiply/multiply-accumulate insns, we copy a value in a
4430 ; core register into a temp register, then use a scalar taken from that. This
4431 ; isn't an optimal solution if e.g. the scalar has just been read from memory
4432 ; or extracted from another vector. The latter case it's currently better to
4433 ; use the "_lane" variant, and the former case can probably be implemented
4434 ; using vld1_lane, but that hasn't been done yet.
4435
4436 (define_expand "neon_vmul_n<mode>"
4437   [(match_operand:VMD 0 "s_register_operand")
4438    (match_operand:VMD 1 "s_register_operand")
4439    (match_operand:<V_elem> 2 "s_register_operand")]
4440   "TARGET_NEON"
4441 {
4442   rtx tmp = gen_reg_rtx (<MODE>mode);
4443   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4444   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4445                                        const0_rtx));
4446   DONE;
4447 })
4448
4449 (define_expand "neon_vmul_n<mode>"
4450   [(match_operand:VMQ 0 "s_register_operand")
4451    (match_operand:VMQ 1 "s_register_operand")
4452    (match_operand:<V_elem> 2 "s_register_operand")]
4453   "TARGET_NEON"
4454 {
4455   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4456   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4457   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4458                                        const0_rtx));
4459   DONE;
4460 })
4461
4462 (define_expand "neon_vmul_n<mode>"
4463   [(match_operand:VH 0 "s_register_operand")
4464    (match_operand:VH 1 "s_register_operand")
4465    (match_operand:<V_elem> 2 "s_register_operand")]
4466   "TARGET_NEON_FP16INST"
4467 {
4468   rtx tmp = gen_reg_rtx (V4HFmode);
4469   emit_insn (gen_neon_vset_lanev4hf (tmp, operands[2], tmp, const0_rtx));
4470   emit_insn (gen_neon_vmul_lane<mode> (operands[0], operands[1], tmp,
4471                                        const0_rtx));
4472   DONE;
4473 })
4474
4475 (define_expand "neon_vmulls_n<mode>"
4476   [(match_operand:<V_widen> 0 "s_register_operand")
4477    (match_operand:VMDI 1 "s_register_operand")
4478    (match_operand:<V_elem> 2 "s_register_operand")]
4479   "TARGET_NEON"
4480 {
4481   rtx tmp = gen_reg_rtx (<MODE>mode);
4482   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4483   emit_insn (gen_neon_vmulls_lane<mode> (operands[0], operands[1], tmp,
4484                                          const0_rtx));
4485   DONE;
4486 })
4487
4488 (define_expand "neon_vmullu_n<mode>"
4489   [(match_operand:<V_widen> 0 "s_register_operand")
4490    (match_operand:VMDI 1 "s_register_operand")
4491    (match_operand:<V_elem> 2 "s_register_operand")]
4492   "TARGET_NEON"
4493 {
4494   rtx tmp = gen_reg_rtx (<MODE>mode);
4495   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4496   emit_insn (gen_neon_vmullu_lane<mode> (operands[0], operands[1], tmp,
4497                                          const0_rtx));
4498   DONE;
4499 })
4500
4501 (define_expand "neon_vqdmull_n<mode>"
4502   [(match_operand:<V_widen> 0 "s_register_operand")
4503    (match_operand:VMDI 1 "s_register_operand")
4504    (match_operand:<V_elem> 2 "s_register_operand")]
4505   "TARGET_NEON"
4506 {
4507   rtx tmp = gen_reg_rtx (<MODE>mode);
4508   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4509   emit_insn (gen_neon_vqdmull_lane<mode> (operands[0], operands[1], tmp,
4510                                           const0_rtx));
4511   DONE;
4512 })
4513
4514 (define_expand "neon_vqdmulh_n<mode>"
4515   [(match_operand:VMDI 0 "s_register_operand")
4516    (match_operand:VMDI 1 "s_register_operand")
4517    (match_operand:<V_elem> 2 "s_register_operand")]
4518   "TARGET_NEON"
4519 {
4520   rtx tmp = gen_reg_rtx (<MODE>mode);
4521   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4522   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4523                                           const0_rtx));
4524   DONE;
4525 })
4526
4527 (define_expand "neon_vqrdmulh_n<mode>"
4528   [(match_operand:VMDI 0 "s_register_operand")
4529    (match_operand:VMDI 1 "s_register_operand")
4530    (match_operand:<V_elem> 2 "s_register_operand")]
4531   "TARGET_NEON"
4532 {
4533   rtx tmp = gen_reg_rtx (<MODE>mode);
4534   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[2], tmp, const0_rtx));
4535   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4536                                           const0_rtx));
4537   DONE;
4538 })
4539
4540 (define_expand "neon_vqdmulh_n<mode>"
4541   [(match_operand:VMQI 0 "s_register_operand")
4542    (match_operand:VMQI 1 "s_register_operand")
4543    (match_operand:<V_elem> 2 "s_register_operand")]
4544   "TARGET_NEON"
4545 {
4546   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4547   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4548   emit_insn (gen_neon_vqdmulh_lane<mode> (operands[0], operands[1], tmp,
4549                                           const0_rtx));
4550   DONE;
4551 })
4552
4553 (define_expand "neon_vqrdmulh_n<mode>"
4554   [(match_operand:VMQI 0 "s_register_operand")
4555    (match_operand:VMQI 1 "s_register_operand")
4556    (match_operand:<V_elem> 2 "s_register_operand")]
4557   "TARGET_NEON"
4558 {
4559   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4560   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[2], tmp, const0_rtx));
4561   emit_insn (gen_neon_vqrdmulh_lane<mode> (operands[0], operands[1], tmp,
4562                                            const0_rtx));
4563   DONE;
4564 })
4565
4566 (define_expand "neon_vmla_n<mode>"
4567   [(match_operand:VMD 0 "s_register_operand")
4568    (match_operand:VMD 1 "s_register_operand")
4569    (match_operand:VMD 2 "s_register_operand")
4570    (match_operand:<V_elem> 3 "s_register_operand")]
4571   "TARGET_NEON"
4572 {
4573   rtx tmp = gen_reg_rtx (<MODE>mode);
4574   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4575   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4576                                        tmp, const0_rtx));
4577   DONE;
4578 })
4579
4580 (define_expand "neon_vmla_n<mode>"
4581   [(match_operand:VMQ 0 "s_register_operand")
4582    (match_operand:VMQ 1 "s_register_operand")
4583    (match_operand:VMQ 2 "s_register_operand")
4584    (match_operand:<V_elem> 3 "s_register_operand")]
4585   "TARGET_NEON"
4586 {
4587   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4588   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4589   emit_insn (gen_neon_vmla_lane<mode> (operands[0], operands[1], operands[2],
4590                                        tmp, const0_rtx));
4591   DONE;
4592 })
4593
4594 (define_expand "neon_vmlals_n<mode>"
4595   [(match_operand:<V_widen> 0 "s_register_operand")
4596    (match_operand:<V_widen> 1 "s_register_operand")
4597    (match_operand:VMDI 2 "s_register_operand")
4598    (match_operand:<V_elem> 3 "s_register_operand")]
4599   "TARGET_NEON"
4600 {
4601   rtx tmp = gen_reg_rtx (<MODE>mode);
4602   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4603   emit_insn (gen_neon_vmlals_lane<mode> (operands[0], operands[1], operands[2],
4604                                          tmp, const0_rtx));
4605   DONE;
4606 })
4607
4608 (define_expand "neon_vmlalu_n<mode>"
4609   [(match_operand:<V_widen> 0 "s_register_operand")
4610    (match_operand:<V_widen> 1 "s_register_operand")
4611    (match_operand:VMDI 2 "s_register_operand")
4612    (match_operand:<V_elem> 3 "s_register_operand")]
4613   "TARGET_NEON"
4614 {
4615   rtx tmp = gen_reg_rtx (<MODE>mode);
4616   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4617   emit_insn (gen_neon_vmlalu_lane<mode> (operands[0], operands[1], operands[2],
4618                                          tmp, const0_rtx));
4619   DONE;
4620 })
4621
4622 (define_expand "neon_vqdmlal_n<mode>"
4623   [(match_operand:<V_widen> 0 "s_register_operand")
4624    (match_operand:<V_widen> 1 "s_register_operand")
4625    (match_operand:VMDI 2 "s_register_operand")
4626    (match_operand:<V_elem> 3 "s_register_operand")]
4627   "TARGET_NEON"
4628 {
4629   rtx tmp = gen_reg_rtx (<MODE>mode);
4630   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4631   emit_insn (gen_neon_vqdmlal_lane<mode> (operands[0], operands[1], operands[2],
4632                                           tmp, const0_rtx));
4633   DONE;
4634 })
4635
4636 (define_expand "neon_vmls_n<mode>"
4637   [(match_operand:VMD 0 "s_register_operand")
4638    (match_operand:VMD 1 "s_register_operand")
4639    (match_operand:VMD 2 "s_register_operand")
4640    (match_operand:<V_elem> 3 "s_register_operand")]
4641   "TARGET_NEON"
4642 {
4643   rtx tmp = gen_reg_rtx (<MODE>mode);
4644   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4645   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4646                                        tmp, const0_rtx));
4647   DONE;
4648 })
4649
4650 (define_expand "neon_vmls_n<mode>"
4651   [(match_operand:VMQ 0 "s_register_operand")
4652    (match_operand:VMQ 1 "s_register_operand")
4653    (match_operand:VMQ 2 "s_register_operand")
4654    (match_operand:<V_elem> 3 "s_register_operand")]
4655   "TARGET_NEON"
4656 {
4657   rtx tmp = gen_reg_rtx (<V_HALF>mode);
4658   emit_insn (gen_neon_vset_lane<V_half> (tmp, operands[3], tmp, const0_rtx));
4659   emit_insn (gen_neon_vmls_lane<mode> (operands[0], operands[1], operands[2],
4660                                        tmp, const0_rtx));
4661   DONE;
4662 })
4663
4664 (define_expand "neon_vmlsls_n<mode>"
4665   [(match_operand:<V_widen> 0 "s_register_operand")
4666    (match_operand:<V_widen> 1 "s_register_operand")
4667    (match_operand:VMDI 2 "s_register_operand")
4668    (match_operand:<V_elem> 3 "s_register_operand")]
4669   "TARGET_NEON"
4670 {
4671   rtx tmp = gen_reg_rtx (<MODE>mode);
4672   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4673   emit_insn (gen_neon_vmlsls_lane<mode> (operands[0], operands[1], operands[2],
4674                                         tmp, const0_rtx));
4675   DONE;
4676 })
4677
4678 (define_expand "neon_vmlslu_n<mode>"
4679   [(match_operand:<V_widen> 0 "s_register_operand")
4680    (match_operand:<V_widen> 1 "s_register_operand")
4681    (match_operand:VMDI 2 "s_register_operand")
4682    (match_operand:<V_elem> 3 "s_register_operand")]
4683   "TARGET_NEON"
4684 {
4685   rtx tmp = gen_reg_rtx (<MODE>mode);
4686   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4687   emit_insn (gen_neon_vmlslu_lane<mode> (operands[0], operands[1], operands[2],
4688                                         tmp, const0_rtx));
4689   DONE;
4690 })
4691
4692 (define_expand "neon_vqdmlsl_n<mode>"
4693   [(match_operand:<V_widen> 0 "s_register_operand")
4694    (match_operand:<V_widen> 1 "s_register_operand")
4695    (match_operand:VMDI 2 "s_register_operand")
4696    (match_operand:<V_elem> 3 "s_register_operand")]
4697   "TARGET_NEON"
4698 {
4699   rtx tmp = gen_reg_rtx (<MODE>mode);
4700   emit_insn (gen_neon_vset_lane<mode> (tmp, operands[3], tmp, const0_rtx));
4701   emit_insn (gen_neon_vqdmlsl_lane<mode> (operands[0], operands[1], operands[2],
4702                                           tmp, const0_rtx));
4703   DONE;
4704 })
4705
4706 (define_insn "@neon_vext<mode>"
4707   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
4708         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")
4709                       (match_operand:VDQX 2 "s_register_operand" "w")
4710                       (match_operand:SI 3 "immediate_operand" "i")]
4711                      UNSPEC_VEXT))]
4712   "TARGET_NEON"
4713 {
4714   arm_const_bounds (operands[3], 0, GET_MODE_NUNITS (<MODE>mode));
4715   return "vext.<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2, %3";
4716 }
4717   [(set_attr "type" "neon_ext<q>")]
4718 )
4719
4720 (define_insn "@neon_vrev64<mode>"
4721   [(set (match_operand:VDQ 0 "s_register_operand" "=w")
4722         (unspec:VDQ [(match_operand:VDQ 1 "s_register_operand" "w")]
4723                     UNSPEC_VREV64))]
4724   "TARGET_NEON"
4725   "vrev64.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4726   [(set_attr "type" "neon_rev<q>")]
4727 )
4728
4729 (define_insn "@neon_vrev32<mode>"
4730   [(set (match_operand:VX 0 "s_register_operand" "=w")
4731         (unspec:VX [(match_operand:VX 1 "s_register_operand" "w")]
4732                    UNSPEC_VREV32))]
4733   "TARGET_NEON"
4734   "vrev32.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4735   [(set_attr "type" "neon_rev<q>")]
4736 )
4737
4738 (define_insn "@neon_vrev16<mode>"
4739   [(set (match_operand:VE 0 "s_register_operand" "=w")
4740         (unspec:VE [(match_operand:VE 1 "s_register_operand" "w")]
4741                    UNSPEC_VREV16))]
4742   "TARGET_NEON"
4743   "vrev16.<V_sz_elem>\t%<V_reg>0, %<V_reg>1"
4744   [(set_attr "type" "neon_rev<q>")]
4745 )
4746
4747 ; vbsl_* intrinsics may compile to any of vbsl/vbif/vbit depending on register
4748 ; allocation. For an intrinsic of form:
4749 ;   rD = vbsl_* (rS, rN, rM)
4750 ; We can use any of:
4751 ;   vbsl rS, rN, rM  (if D = S)
4752 ;   vbit rD, rN, rS  (if D = M, so 1-bits in rS choose bits from rN, else rM)
4753 ;   vbif rD, rM, rS  (if D = N, so 0-bits in rS choose bits from rM, else rN)
4754
4755 (define_insn "neon_vbsl<mode>_internal"
4756   [(set (match_operand:VDQX 0 "s_register_operand"               "=w,w,w")
4757         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" " 0,w,w")
4758                       (match_operand:VDQX 2 "s_register_operand" " w,w,0")
4759                       (match_operand:VDQX 3 "s_register_operand" " w,0,w")]
4760                      UNSPEC_VBSL))]
4761   "TARGET_NEON"
4762   "@
4763   vbsl\t%<V_reg>0, %<V_reg>2, %<V_reg>3
4764   vbit\t%<V_reg>0, %<V_reg>2, %<V_reg>1
4765   vbif\t%<V_reg>0, %<V_reg>3, %<V_reg>1"
4766   [(set_attr "type" "neon_bsl<q>")]
4767 )
4768
4769 (define_expand "neon_vbsl<mode>"
4770   [(set (match_operand:VDQX 0 "s_register_operand")
4771         (unspec:VDQX [(match_operand:<V_cmp_result> 1 "s_register_operand")
4772                       (match_operand:VDQX 2 "s_register_operand")
4773                       (match_operand:VDQX 3 "s_register_operand")]
4774                      UNSPEC_VBSL))]
4775   "TARGET_NEON"
4776 {
4777   /* We can't alias operands together if they have different modes.  */
4778   operands[1] = gen_lowpart (<MODE>mode, operands[1]);
4779 })
4780
4781 ;; vshl, vrshl
4782 (define_insn "neon_v<shift_op><sup><mode>"
4783   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4784         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4785                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4786                       VSHL))]
4787   "TARGET_NEON"
4788   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4789   [(set_attr "type" "neon_shift_imm<q>")]
4790 )
4791
4792 ;; vqshl, vqrshl
4793 (define_insn "neon_v<shift_op><sup><mode>"
4794   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4795         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4796                        (match_operand:VDQIX 2 "s_register_operand" "w")]
4797                       VQSHL))]
4798   "TARGET_NEON"
4799   "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %<V_reg>2"
4800   [(set_attr "type" "neon_sat_shift_imm<q>")]
4801 )
4802
4803 ;; vshr_n, vrshr_n
4804 (define_insn "neon_v<shift_op><sup>_n<mode>"
4805   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4806         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4807                        (match_operand:SI 2 "immediate_operand" "i")]
4808                       VSHR_N))]
4809   "TARGET_NEON"
4810 {
4811   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) + 1);
4812   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4813 }
4814   [(set_attr "type" "neon_shift_imm<q>")]
4815 )
4816
4817 ;; vshrn_n, vrshrn_n
4818 (define_insn "neon_v<shift_op>_n<mode>"
4819   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4820         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4821                             (match_operand:SI 2 "immediate_operand" "i")]
4822                            VSHRN_N))]
4823   "TARGET_NEON"
4824 {
4825   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4826   return "v<shift_op>.<V_if_elem>\t%P0, %q1, %2";
4827 }
4828   [(set_attr "type" "neon_shift_imm_narrow_q")]
4829 )
4830
4831 ;; vqshrn_n, vqrshrn_n
4832 (define_insn "neon_v<shift_op><sup>_n<mode>"
4833   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4834         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4835                             (match_operand:SI 2 "immediate_operand" "i")]
4836                            VQSHRN_N))]
4837   "TARGET_NEON"
4838 {
4839   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4840   return "v<shift_op>.<sup>%#<V_sz_elem>\t%P0, %q1, %2";
4841 }
4842   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4843 )
4844
4845 ;; vqshrun_n, vqrshrun_n
4846 (define_insn "neon_v<shift_op>_n<mode>"
4847   [(set (match_operand:<V_narrow> 0 "s_register_operand" "=w")
4848         (unspec:<V_narrow> [(match_operand:VN 1 "s_register_operand" "w")
4849                             (match_operand:SI 2 "immediate_operand" "i")]
4850                            VQSHRUN_N))]
4851   "TARGET_NEON"
4852 {
4853   arm_const_bounds (operands[2], 1, neon_element_bits (<MODE>mode) / 2 + 1);
4854   return "v<shift_op>.<V_s_elem>\t%P0, %q1, %2";
4855 }
4856   [(set_attr "type" "neon_sat_shift_imm_narrow_q")]
4857 )
4858
4859 (define_insn "neon_vshl_n<mode>"
4860   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4861         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4862                        (match_operand:SI 2 "immediate_operand" "i")]
4863                       UNSPEC_VSHL_N))]
4864   "TARGET_NEON"
4865 {
4866   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4867   return "vshl.<V_if_elem>\t%<V_reg>0, %<V_reg>1, %2";
4868 }
4869   [(set_attr "type" "neon_shift_imm<q>")]
4870 )
4871
4872 (define_insn "neon_vqshl_<sup>_n<mode>"
4873   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4874         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4875                        (match_operand:SI 2 "immediate_operand" "i")]
4876                       VQSHL_N))]
4877   "TARGET_NEON"
4878 {
4879   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4880   return "vqshl.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>1, %2";
4881 }
4882   [(set_attr "type" "neon_sat_shift_imm<q>")]
4883 )
4884
4885 (define_insn "neon_vqshlu_n<mode>"
4886   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4887         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "w")
4888                        (match_operand:SI 2 "immediate_operand" "i")]
4889                       UNSPEC_VQSHLU_N))]
4890   "TARGET_NEON"
4891 {
4892   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode));
4893   return "vqshlu.<V_s_elem>\t%<V_reg>0, %<V_reg>1, %2";
4894 }
4895   [(set_attr "type" "neon_sat_shift_imm<q>")]
4896 )
4897
4898 (define_insn "neon_vshll<sup>_n<mode>"
4899   [(set (match_operand:<V_widen> 0 "s_register_operand" "=w")
4900         (unspec:<V_widen> [(match_operand:VW 1 "s_register_operand" "w")
4901                            (match_operand:SI 2 "immediate_operand" "i")]
4902                           VSHLL_N))]
4903   "TARGET_NEON"
4904 {
4905   /* The boundaries are: 0 < imm <= size.  */
4906   arm_const_bounds (operands[2], 0, neon_element_bits (<MODE>mode) + 1);
4907   return "vshll.<sup>%#<V_sz_elem>\t%q0, %P1, %2";
4908 }
4909   [(set_attr "type" "neon_shift_imm_long")]
4910 )
4911
4912 ;; vsra_n, vrsra_n
4913 (define_insn "neon_v<shift_op><sup>_n<mode>"
4914   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4915         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4916                        (match_operand:VDQIX 2 "s_register_operand" "w")
4917                        (match_operand:SI 3 "immediate_operand" "i")]
4918                       VSRA_N))]
4919   "TARGET_NEON"
4920 {
4921   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4922   return "v<shift_op>.<sup>%#<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4923 }
4924   [(set_attr "type" "neon_shift_acc<q>")]
4925 )
4926
4927 (define_insn "neon_vsri_n<mode>"
4928   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4929         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4930                        (match_operand:VDQIX 2 "s_register_operand" "w")
4931                        (match_operand:SI 3 "immediate_operand" "i")]
4932                       UNSPEC_VSRI))]
4933   "TARGET_NEON"
4934 {
4935   arm_const_bounds (operands[3], 1, neon_element_bits (<MODE>mode) + 1);
4936   return "vsri.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4937 }
4938   [(set_attr "type" "neon_shift_reg<q>")]
4939 )
4940
4941 (define_insn "neon_vsli_n<mode>"
4942   [(set (match_operand:VDQIX 0 "s_register_operand" "=w")
4943         (unspec:VDQIX [(match_operand:VDQIX 1 "s_register_operand" "0")
4944                        (match_operand:VDQIX 2 "s_register_operand" "w")
4945                        (match_operand:SI 3 "immediate_operand" "i")]
4946                       UNSPEC_VSLI))]
4947   "TARGET_NEON"
4948 {
4949   arm_const_bounds (operands[3], 0, neon_element_bits (<MODE>mode));
4950   return "vsli.<V_sz_elem>\t%<V_reg>0, %<V_reg>2, %3";
4951 }
4952   [(set_attr "type" "neon_shift_reg<q>")]
4953 )
4954
4955 (define_insn "neon_vtbl1v8qi"
4956   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4957         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "w")
4958                       (match_operand:V8QI 2 "s_register_operand" "w")]
4959                      UNSPEC_VTBL))]
4960   "TARGET_NEON"
4961   "vtbl.8\t%P0, {%P1}, %P2"
4962   [(set_attr "type" "neon_tbl1")]
4963 )
4964
4965 (define_insn "neon_vtbl2v8qi"
4966   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4967         (unspec:V8QI [(match_operand:TI 1 "s_register_operand" "w")
4968                       (match_operand:V8QI 2 "s_register_operand" "w")]
4969                      UNSPEC_VTBL))]
4970   "TARGET_NEON"
4971 {
4972   rtx ops[4];
4973   int tabbase = REGNO (operands[1]);
4974
4975   ops[0] = operands[0];
4976   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4977   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4978   ops[3] = operands[2];
4979   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2}, %P3", ops);
4980
4981   return "";
4982 }
4983   [(set_attr "type" "neon_tbl2")]
4984 )
4985
4986 (define_insn "neon_vtbl3v8qi"
4987   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
4988         (unspec:V8QI [(match_operand:EI 1 "s_register_operand" "w")
4989                       (match_operand:V8QI 2 "s_register_operand" "w")]
4990                      UNSPEC_VTBL))]
4991   "TARGET_NEON"
4992 {
4993   rtx ops[5];
4994   int tabbase = REGNO (operands[1]);
4995
4996   ops[0] = operands[0];
4997   ops[1] = gen_rtx_REG (V8QImode, tabbase);
4998   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
4999   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5000   ops[4] = operands[2];
5001   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5002
5003   return "";
5004 }
5005   [(set_attr "type" "neon_tbl3")]
5006 )
5007
5008 (define_insn "neon_vtbl4v8qi"
5009   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5010         (unspec:V8QI [(match_operand:OI 1 "s_register_operand" "w")
5011                       (match_operand:V8QI 2 "s_register_operand" "w")]
5012                      UNSPEC_VTBL))]
5013   "TARGET_NEON"
5014 {
5015   rtx ops[6];
5016   int tabbase = REGNO (operands[1]);
5017
5018   ops[0] = operands[0];
5019   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5020   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5021   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5022   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5023   ops[5] = operands[2];
5024   output_asm_insn ("vtbl.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5025
5026   return "";
5027 }
5028   [(set_attr "type" "neon_tbl4")]
5029 )
5030
5031 ;; These three are used by the vec_perm infrastructure for V16QImode.
5032 (define_insn_and_split "neon_vtbl1v16qi"
5033   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5034         (unspec:V16QI [(match_operand:V16QI 1 "s_register_operand" "w")
5035                        (match_operand:V16QI 2 "s_register_operand" "w")]
5036                       UNSPEC_VTBL))]
5037   "TARGET_NEON"
5038   "#"
5039   "&& reload_completed"
5040   [(const_int 0)]
5041 {
5042   rtx op0, op1, op2, part0, part2;
5043   unsigned ofs;
5044
5045   op0 = operands[0];
5046   op1 = gen_lowpart (TImode, operands[1]);
5047   op2 = operands[2];
5048
5049   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5050   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5051   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5052   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5053
5054   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5055   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5056   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5057   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5058   DONE;
5059 }
5060   [(set_attr "type" "multiple")]
5061 )
5062
5063 (define_insn_and_split "neon_vtbl2v16qi"
5064   [(set (match_operand:V16QI 0 "s_register_operand" "=&w")
5065         (unspec:V16QI [(match_operand:OI 1 "s_register_operand" "w")
5066                        (match_operand:V16QI 2 "s_register_operand" "w")]
5067                       UNSPEC_VTBL))]
5068   "TARGET_NEON"
5069   "#"
5070   "&& reload_completed"
5071   [(const_int 0)]
5072 {
5073   rtx op0, op1, op2, part0, part2;
5074   unsigned ofs;
5075
5076   op0 = operands[0];
5077   op1 = operands[1];
5078   op2 = operands[2];
5079
5080   ofs = subreg_lowpart_offset (V8QImode, V16QImode);
5081   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5082   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5083   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5084
5085   ofs = subreg_highpart_offset (V8QImode, V16QImode);
5086   part0 = simplify_subreg (V8QImode, op0, V16QImode, ofs);
5087   part2 = simplify_subreg (V8QImode, op2, V16QImode, ofs);
5088   emit_insn (gen_neon_vtbl2v8qi (part0, op1, part2));
5089   DONE;
5090 }
5091   [(set_attr "type" "multiple")]
5092 )
5093
5094 ;; ??? Logically we should extend the regular neon_vcombine pattern to
5095 ;; handle quad-word input modes, producing octa-word output modes.  But
5096 ;; that requires us to add support for octa-word vector modes in moves.
5097 ;; That seems overkill for this one use in vec_perm.
5098 (define_insn_and_split "neon_vcombinev16qi"
5099   [(set (match_operand:OI 0 "s_register_operand" "=w")
5100         (unspec:OI [(match_operand:V16QI 1 "s_register_operand" "w")
5101                     (match_operand:V16QI 2 "s_register_operand" "w")]
5102                    UNSPEC_VCONCAT))]
5103   "TARGET_NEON"
5104   "#"
5105   "&& reload_completed"
5106   [(const_int 0)]
5107 {
5108   neon_split_vcombine (operands);
5109   DONE;
5110 }
5111 [(set_attr "type" "multiple")]
5112 )
5113
5114 (define_insn "neon_vtbx1v8qi"
5115   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5116         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5117                       (match_operand:V8QI 2 "s_register_operand" "w")
5118                       (match_operand:V8QI 3 "s_register_operand" "w")]
5119                      UNSPEC_VTBX))]
5120   "TARGET_NEON"
5121   "vtbx.8\t%P0, {%P2}, %P3"
5122   [(set_attr "type" "neon_tbl1")]
5123 )
5124
5125 (define_insn "neon_vtbx2v8qi"
5126   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5127         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5128                       (match_operand:TI 2 "s_register_operand" "w")
5129                       (match_operand:V8QI 3 "s_register_operand" "w")]
5130                      UNSPEC_VTBX))]
5131   "TARGET_NEON"
5132 {
5133   rtx ops[4];
5134   int tabbase = REGNO (operands[2]);
5135
5136   ops[0] = operands[0];
5137   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5138   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5139   ops[3] = operands[3];
5140   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2}, %P3", ops);
5141
5142   return "";
5143 }
5144   [(set_attr "type" "neon_tbl2")]
5145 )
5146
5147 (define_insn "neon_vtbx3v8qi"
5148   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5149         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5150                       (match_operand:EI 2 "s_register_operand" "w")
5151                       (match_operand:V8QI 3 "s_register_operand" "w")]
5152                      UNSPEC_VTBX))]
5153   "TARGET_NEON"
5154 {
5155   rtx ops[5];
5156   int tabbase = REGNO (operands[2]);
5157
5158   ops[0] = operands[0];
5159   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5160   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5161   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5162   ops[4] = operands[3];
5163   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3}, %P4", ops);
5164
5165   return "";
5166 }
5167   [(set_attr "type" "neon_tbl3")]
5168 )
5169
5170 (define_insn "neon_vtbx4v8qi"
5171   [(set (match_operand:V8QI 0 "s_register_operand" "=w")
5172         (unspec:V8QI [(match_operand:V8QI 1 "s_register_operand" "0")
5173                       (match_operand:OI 2 "s_register_operand" "w")
5174                       (match_operand:V8QI 3 "s_register_operand" "w")]
5175                      UNSPEC_VTBX))]
5176   "TARGET_NEON"
5177 {
5178   rtx ops[6];
5179   int tabbase = REGNO (operands[2]);
5180
5181   ops[0] = operands[0];
5182   ops[1] = gen_rtx_REG (V8QImode, tabbase);
5183   ops[2] = gen_rtx_REG (V8QImode, tabbase + 2);
5184   ops[3] = gen_rtx_REG (V8QImode, tabbase + 4);
5185   ops[4] = gen_rtx_REG (V8QImode, tabbase + 6);
5186   ops[5] = operands[3];
5187   output_asm_insn ("vtbx.8\t%P0, {%P1, %P2, %P3, %P4}, %P5", ops);
5188
5189   return "";
5190 }
5191   [(set_attr "type" "neon_tbl4")]
5192 )
5193
5194 (define_expand "@neon_vtrn<mode>_internal"
5195   [(parallel
5196     [(set (match_operand:VDQWH 0 "s_register_operand")
5197           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5198                          (match_operand:VDQWH 2 "s_register_operand")]
5199            UNSPEC_VTRN1))
5200      (set (match_operand:VDQWH 3 "s_register_operand")
5201           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VTRN2))])]
5202   "TARGET_NEON"
5203   ""
5204 )
5205
5206 ;; Note: Different operand numbering to handle tied registers correctly.
5207 (define_insn "*neon_vtrn<mode>_insn"
5208   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5209         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5210                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5211          UNSPEC_VTRN1))
5212    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5213         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5214          UNSPEC_VTRN2))]
5215   "TARGET_NEON"
5216   "vtrn.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5217   [(set_attr "type" "neon_permute<q>")]
5218 )
5219
5220 (define_expand "@neon_vzip<mode>_internal"
5221   [(parallel
5222     [(set (match_operand:VDQWH 0 "s_register_operand")
5223           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5224                          (match_operand:VDQWH 2 "s_register_operand")]
5225            UNSPEC_VZIP1))
5226     (set (match_operand:VDQWH 3 "s_register_operand")
5227          (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VZIP2))])]
5228   "TARGET_NEON"
5229   ""
5230 )
5231
5232 ;; Note: Different operand numbering to handle tied registers correctly.
5233 (define_insn "*neon_vzip<mode>_insn"
5234   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5235         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5236                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5237          UNSPEC_VZIP1))
5238    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5239         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5240          UNSPEC_VZIP2))]
5241   "TARGET_NEON"
5242   "vzip.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5243   [(set_attr "type" "neon_zip<q>")]
5244 )
5245
5246 (define_expand "@neon_vuzp<mode>_internal"
5247   [(parallel
5248     [(set (match_operand:VDQWH 0 "s_register_operand")
5249           (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand")
5250                         (match_operand:VDQWH 2 "s_register_operand")]
5251            UNSPEC_VUZP1))
5252      (set (match_operand:VDQWH 3 "s_register_operand")
5253           (unspec:VDQWH [(match_dup 1) (match_dup 2)] UNSPEC_VUZP2))])]
5254   "TARGET_NEON"
5255   ""
5256 )
5257
5258 ;; Note: Different operand numbering to handle tied registers correctly.
5259 (define_insn "*neon_vuzp<mode>_insn"
5260   [(set (match_operand:VDQWH 0 "s_register_operand" "=&w")
5261         (unspec:VDQWH [(match_operand:VDQWH 1 "s_register_operand" "0")
5262                        (match_operand:VDQWH 3 "s_register_operand" "2")]
5263          UNSPEC_VUZP1))
5264    (set (match_operand:VDQWH 2 "s_register_operand" "=&w")
5265         (unspec:VDQWH [(match_dup 1) (match_dup 3)]
5266          UNSPEC_VUZP2))]
5267   "TARGET_NEON"
5268   "vuzp.<V_sz_elem>\t%<V_reg>0, %<V_reg>2"
5269   [(set_attr "type" "neon_zip<q>")]
5270 )
5271
5272 (define_expand "vec_load_lanes<mode><mode>"
5273   [(set (match_operand:VDQX 0 "s_register_operand")
5274         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand")]
5275                      UNSPEC_VLD1))]
5276   "TARGET_NEON")
5277
5278 (define_insn "neon_vld1<mode>"
5279   [(set (match_operand:VDQX 0 "s_register_operand" "=w")
5280         (unspec:VDQX [(match_operand:VDQX 1 "neon_struct_operand" "Um")]
5281                     UNSPEC_VLD1))]
5282   "TARGET_NEON"
5283   "vld1.<V_sz_elem>\t%h0, %A1"
5284   [(set_attr "type" "neon_load1_1reg<q>")]
5285 )
5286
5287 ;; The lane numbers in the RTL are in GCC lane order, having been flipped
5288 ;; in arm_expand_neon_args. The lane numbers are restored to architectural
5289 ;; lane order here.
5290 (define_insn "neon_vld1_lane<mode>"
5291   [(set (match_operand:VDX 0 "s_register_operand" "=w")
5292         (unspec:VDX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5293                      (match_operand:VDX 2 "s_register_operand" "0")
5294                      (match_operand:SI 3 "immediate_operand" "i")]
5295                     UNSPEC_VLD1_LANE))]
5296   "TARGET_NEON"
5297 {
5298   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5299   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5300   operands[3] = GEN_INT (lane);
5301   if (max == 1)
5302     return "vld1.<V_sz_elem>\t%P0, %A1";
5303   else
5304     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5305 }
5306   [(set_attr "type" "neon_load1_one_lane<q>")]
5307 )
5308
5309 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5310 ;; here on big endian targets.
5311 (define_insn "neon_vld1_lane<mode>"
5312   [(set (match_operand:VQX 0 "s_register_operand" "=w")
5313         (unspec:VQX [(match_operand:<V_elem> 1 "neon_struct_operand" "Um")
5314                      (match_operand:VQX 2 "s_register_operand" "0")
5315                      (match_operand:SI 3 "immediate_operand" "i")]
5316                     UNSPEC_VLD1_LANE))]
5317   "TARGET_NEON"
5318 {
5319   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5320   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5321   operands[3] = GEN_INT (lane);
5322   int regno = REGNO (operands[0]);
5323   if (lane >= max / 2)
5324     {
5325       lane -= max / 2;
5326       regno += 2;
5327       operands[3] = GEN_INT (lane);
5328     }
5329   operands[0] = gen_rtx_REG (<V_HALF>mode, regno);
5330   if (max == 2)
5331     return "vld1.<V_sz_elem>\t%P0, %A1";
5332   else
5333     return "vld1.<V_sz_elem>\t{%P0[%c3]}, %A1";
5334 }
5335   [(set_attr "type" "neon_load1_one_lane<q>")]
5336 )
5337
5338 (define_insn "neon_vld1_dup<mode>"
5339   [(set (match_operand:VD_LANE 0 "s_register_operand" "=w")
5340         (vec_duplicate:VD_LANE (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5341   "TARGET_NEON"
5342   "vld1.<V_sz_elem>\t{%P0[]}, %A1"
5343   [(set_attr "type" "neon_load1_all_lanes<q>")]
5344 )
5345
5346 ;; Special case for DImode.  Treat it exactly like a simple load.
5347 (define_expand "neon_vld1_dupdi"
5348   [(set (match_operand:DI 0 "s_register_operand")
5349         (unspec:DI [(match_operand:DI 1 "neon_struct_operand")]
5350                    UNSPEC_VLD1))]
5351   "TARGET_NEON"
5352   ""
5353 )
5354
5355 (define_insn "neon_vld1_dup<mode>"
5356   [(set (match_operand:VQ2 0 "s_register_operand" "=w")
5357         (vec_duplicate:VQ2 (match_operand:<V_elem> 1 "neon_struct_operand" "Um")))]
5358   "TARGET_NEON"
5359 {
5360   return "vld1.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5361 }
5362   [(set_attr "type" "neon_load1_all_lanes<q>")]
5363 )
5364
5365 (define_insn_and_split "neon_vld1_dupv2di"
5366    [(set (match_operand:V2DI 0 "s_register_operand" "=w")
5367     (vec_duplicate:V2DI (match_operand:DI 1 "neon_struct_operand" "Um")))]
5368    "TARGET_NEON"
5369    "#"
5370    "&& reload_completed"
5371    [(const_int 0)]
5372    {
5373     rtx tmprtx = gen_lowpart (DImode, operands[0]);
5374     emit_insn (gen_neon_vld1_dupdi (tmprtx, operands[1]));
5375     emit_move_insn (gen_highpart (DImode, operands[0]), tmprtx );
5376     DONE;
5377     }
5378   [(set_attr "length" "8")
5379    (set_attr "type" "neon_load1_all_lanes_q")]
5380 )
5381
5382 (define_expand "vec_store_lanes<mode><mode>"
5383   [(set (match_operand:VDQX 0 "neon_struct_operand")
5384         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand")]
5385                      UNSPEC_VST1))]
5386   "TARGET_NEON")
5387
5388 (define_insn "neon_vst1<mode>"
5389   [(set (match_operand:VDQX 0 "neon_struct_operand" "=Um")
5390         (unspec:VDQX [(match_operand:VDQX 1 "s_register_operand" "w")]
5391                      UNSPEC_VST1))]
5392   "TARGET_NEON"
5393   "vst1.<V_sz_elem>\t%h1, %A0"
5394   [(set_attr "type" "neon_store1_1reg<q>")])
5395
5396 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5397 ;; here on big endian targets.
5398 (define_insn "neon_vst1_lane<mode>"
5399   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5400         (unspec:<V_elem>
5401           [(match_operand:VDX 1 "s_register_operand" "w")
5402            (match_operand:SI 2 "immediate_operand" "i")]
5403           UNSPEC_VST1_LANE))]
5404   "TARGET_NEON"
5405 {
5406   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5407   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5408   operands[2] = GEN_INT (lane);
5409   if (max == 1)
5410     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5411   else
5412     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5413 }
5414   [(set_attr "type" "neon_store1_one_lane<q>")]
5415 )
5416
5417 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5418 ;; here on big endian targets.
5419 (define_insn "neon_vst1_lane<mode>"
5420   [(set (match_operand:<V_elem> 0 "neon_struct_operand" "=Um")
5421         (unspec:<V_elem>
5422           [(match_operand:VQX 1 "s_register_operand" "w")
5423            (match_operand:SI 2 "immediate_operand" "i")]
5424           UNSPEC_VST1_LANE))]
5425   "TARGET_NEON"
5426 {
5427   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5428   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5429   int regno = REGNO (operands[1]);
5430   if (lane >= max / 2)
5431     {
5432       lane -= max / 2;
5433       regno += 2;
5434     }
5435   operands[2] = GEN_INT (lane);
5436   operands[1] = gen_rtx_REG (<V_HALF>mode, regno);
5437   if (max == 2)
5438     return "vst1.<V_sz_elem>\t{%P1}, %A0";
5439   else
5440     return "vst1.<V_sz_elem>\t{%P1[%c2]}, %A0";
5441 }
5442   [(set_attr "type" "neon_store1_one_lane<q>")]
5443 )
5444
5445 (define_expand "vec_load_lanesti<mode>"
5446   [(set (match_operand:TI 0 "s_register_operand")
5447         (unspec:TI [(match_operand:TI 1 "neon_struct_operand")
5448                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5449                    UNSPEC_VLD2))]
5450   "TARGET_NEON")
5451
5452 (define_insn "neon_vld2<mode>"
5453   [(set (match_operand:TI 0 "s_register_operand" "=w")
5454         (unspec:TI [(match_operand:TI 1 "neon_struct_operand" "Um")
5455                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5456                    UNSPEC_VLD2))]
5457   "TARGET_NEON"
5458 {
5459   if (<V_sz_elem> == 64)
5460     return "vld1.64\t%h0, %A1";
5461   else
5462     return "vld2.<V_sz_elem>\t%h0, %A1";
5463 }
5464   [(set (attr "type")
5465       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5466                     (const_string "neon_load1_2reg<q>")
5467                     (const_string "neon_load2_2reg<q>")))]
5468 )
5469
5470 (define_expand "vec_load_lanesoi<mode>"
5471   [(set (match_operand:OI 0 "s_register_operand")
5472         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5473                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5474                    UNSPEC_VLD2))]
5475   "TARGET_NEON")
5476
5477 (define_insn "neon_vld2<mode>"
5478   [(set (match_operand:OI 0 "s_register_operand" "=w")
5479         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5480                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5481                    UNSPEC_VLD2))]
5482   "TARGET_NEON"
5483   "vld2.<V_sz_elem>\t%h0, %A1"
5484   [(set_attr "type" "neon_load2_2reg_q")])
5485
5486 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5487 ;; here on big endian targets.
5488 (define_insn "neon_vld2_lane<mode>"
5489   [(set (match_operand:TI 0 "s_register_operand" "=w")
5490         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5491                     (match_operand:TI 2 "s_register_operand" "0")
5492                     (match_operand:SI 3 "immediate_operand" "i")
5493                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5494                    UNSPEC_VLD2_LANE))]
5495   "TARGET_NEON"
5496 {
5497   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5498   int regno = REGNO (operands[0]);
5499   rtx ops[4];
5500   ops[0] = gen_rtx_REG (DImode, regno);
5501   ops[1] = gen_rtx_REG (DImode, regno + 2);
5502   ops[2] = operands[1];
5503   ops[3] = GEN_INT (lane);
5504   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5505   return "";
5506 }
5507   [(set_attr "type" "neon_load2_one_lane<q>")]
5508 )
5509
5510 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5511 ;; here on big endian targets.
5512 (define_insn "neon_vld2_lane<mode>"
5513   [(set (match_operand:OI 0 "s_register_operand" "=w")
5514         (unspec:OI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5515                     (match_operand:OI 2 "s_register_operand" "0")
5516                     (match_operand:SI 3 "immediate_operand" "i")
5517                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5518                    UNSPEC_VLD2_LANE))]
5519   "TARGET_NEON"
5520 {
5521   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5522   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5523   int regno = REGNO (operands[0]);
5524   rtx ops[4];
5525   if (lane >= max / 2)
5526     {
5527       lane -= max / 2;
5528       regno += 2;
5529     }
5530   ops[0] = gen_rtx_REG (DImode, regno);
5531   ops[1] = gen_rtx_REG (DImode, regno + 4);
5532   ops[2] = operands[1];
5533   ops[3] = GEN_INT (lane);
5534   output_asm_insn ("vld2.<V_sz_elem>\t{%P0[%c3], %P1[%c3]}, %A2", ops);
5535   return "";
5536 }
5537   [(set_attr "type" "neon_load2_one_lane<q>")]
5538 )
5539
5540 (define_insn "neon_vld2_dup<mode>"
5541   [(set (match_operand:TI 0 "s_register_operand" "=w")
5542         (unspec:TI [(match_operand:<V_two_elem> 1 "neon_struct_operand" "Um")
5543                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5544                    UNSPEC_VLD2_DUP))]
5545   "TARGET_NEON"
5546 {
5547   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5548     return "vld2.<V_sz_elem>\t{%e0[], %f0[]}, %A1";
5549   else
5550     return "vld1.<V_sz_elem>\t%h0, %A1";
5551 }
5552   [(set (attr "type")
5553       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5554                     (const_string "neon_load2_all_lanes<q>")
5555                     (const_string "neon_load1_1reg<q>")))]
5556 )
5557
5558 (define_expand "vec_store_lanesti<mode>"
5559   [(set (match_operand:TI 0 "neon_struct_operand")
5560         (unspec:TI [(match_operand:TI 1 "s_register_operand")
5561                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5562                    UNSPEC_VST2))]
5563   "TARGET_NEON")
5564
5565 (define_insn "neon_vst2<mode>"
5566   [(set (match_operand:TI 0 "neon_struct_operand" "=Um")
5567         (unspec:TI [(match_operand:TI 1 "s_register_operand" "w")
5568                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5569                    UNSPEC_VST2))]
5570   "TARGET_NEON"
5571 {
5572   if (<V_sz_elem> == 64)
5573     return "vst1.64\t%h1, %A0";
5574   else
5575     return "vst2.<V_sz_elem>\t%h1, %A0";
5576 }
5577   [(set (attr "type")
5578       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5579                     (const_string "neon_store1_2reg<q>")
5580                     (const_string "neon_store2_one_lane<q>")))]
5581 )
5582
5583 (define_expand "vec_store_lanesoi<mode>"
5584   [(set (match_operand:OI 0 "neon_struct_operand")
5585         (unspec:OI [(match_operand:OI 1 "s_register_operand")
5586                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5587                    UNSPEC_VST2))]
5588   "TARGET_NEON")
5589
5590 (define_insn "neon_vst2<mode>"
5591   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
5592         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
5593                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5594                    UNSPEC_VST2))]
5595   "TARGET_NEON"
5596   "vst2.<V_sz_elem>\t%h1, %A0"
5597   [(set_attr "type" "neon_store2_4reg<q>")]
5598 )
5599
5600 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5601 ;; here on big endian targets.
5602 (define_insn "neon_vst2_lane<mode>"
5603   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5604         (unspec:<V_two_elem>
5605           [(match_operand:TI 1 "s_register_operand" "w")
5606            (match_operand:SI 2 "immediate_operand" "i")
5607            (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5608           UNSPEC_VST2_LANE))]
5609   "TARGET_NEON"
5610 {
5611   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5612   int regno = REGNO (operands[1]);
5613   rtx ops[4];
5614   ops[0] = operands[0];
5615   ops[1] = gen_rtx_REG (DImode, regno);
5616   ops[2] = gen_rtx_REG (DImode, regno + 2);
5617   ops[3] = GEN_INT (lane);
5618   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5619   return "";
5620 }
5621   [(set_attr "type" "neon_store2_one_lane<q>")]
5622 )
5623
5624 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5625 ;; here on big endian targets.
5626 (define_insn "neon_vst2_lane<mode>"
5627   [(set (match_operand:<V_two_elem> 0 "neon_struct_operand" "=Um")
5628         (unspec:<V_two_elem>
5629            [(match_operand:OI 1 "s_register_operand" "w")
5630             (match_operand:SI 2 "immediate_operand" "i")
5631             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5632            UNSPEC_VST2_LANE))]
5633   "TARGET_NEON"
5634 {
5635   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5636   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5637   int regno = REGNO (operands[1]);
5638   rtx ops[4];
5639   if (lane >= max / 2)
5640     {
5641       lane -= max / 2;
5642       regno += 2;
5643     }
5644   ops[0] = operands[0];
5645   ops[1] = gen_rtx_REG (DImode, regno);
5646   ops[2] = gen_rtx_REG (DImode, regno + 4);
5647   ops[3] = GEN_INT (lane);
5648   output_asm_insn ("vst2.<V_sz_elem>\t{%P1[%c3], %P2[%c3]}, %A0", ops);
5649   return "";
5650 }
5651   [(set_attr "type" "neon_store2_one_lane<q>")]
5652 )
5653
5654 (define_expand "vec_load_lanesei<mode>"
5655   [(set (match_operand:EI 0 "s_register_operand")
5656         (unspec:EI [(match_operand:EI 1 "neon_struct_operand")
5657                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5658                    UNSPEC_VLD3))]
5659   "TARGET_NEON")
5660
5661 (define_insn "neon_vld3<mode>"
5662   [(set (match_operand:EI 0 "s_register_operand" "=w")
5663         (unspec:EI [(match_operand:EI 1 "neon_struct_operand" "Um")
5664                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5665                    UNSPEC_VLD3))]
5666   "TARGET_NEON"
5667 {
5668   if (<V_sz_elem> == 64)
5669     return "vld1.64\t%h0, %A1";
5670   else
5671     return "vld3.<V_sz_elem>\t%h0, %A1";
5672 }
5673   [(set (attr "type")
5674       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5675                     (const_string "neon_load1_3reg<q>")
5676                     (const_string "neon_load3_3reg<q>")))]
5677 )
5678
5679 (define_expand "vec_load_lanesci<mode>"
5680   [(match_operand:CI 0 "s_register_operand")
5681    (match_operand:CI 1 "neon_struct_operand")
5682    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5683   "TARGET_NEON"
5684 {
5685   emit_insn (gen_neon_vld3<mode> (operands[0], operands[1]));
5686   DONE;
5687 })
5688
5689 (define_expand "neon_vld3<mode>"
5690   [(match_operand:CI 0 "s_register_operand")
5691    (match_operand:CI 1 "neon_struct_operand")
5692    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5693   "TARGET_NEON"
5694 {
5695   rtx mem;
5696
5697   mem = adjust_address (operands[1], EImode, 0);
5698   emit_insn (gen_neon_vld3qa<mode> (operands[0], mem));
5699   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5700   emit_insn (gen_neon_vld3qb<mode> (operands[0], mem, operands[0]));
5701   DONE;
5702 })
5703
5704 (define_insn "neon_vld3qa<mode>"
5705   [(set (match_operand:CI 0 "s_register_operand" "=w")
5706         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5707                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5708                    UNSPEC_VLD3A))]
5709   "TARGET_NEON"
5710 {
5711   int regno = REGNO (operands[0]);
5712   rtx ops[4];
5713   ops[0] = gen_rtx_REG (DImode, regno);
5714   ops[1] = gen_rtx_REG (DImode, regno + 4);
5715   ops[2] = gen_rtx_REG (DImode, regno + 8);
5716   ops[3] = operands[1];
5717   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5718   return "";
5719 }
5720   [(set_attr "type" "neon_load3_3reg<q>")]
5721 )
5722
5723 (define_insn "neon_vld3qb<mode>"
5724   [(set (match_operand:CI 0 "s_register_operand" "=w")
5725         (unspec:CI [(match_operand:EI 1 "neon_struct_operand" "Um")
5726                     (match_operand:CI 2 "s_register_operand" "0")
5727                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5728                    UNSPEC_VLD3B))]
5729   "TARGET_NEON"
5730 {
5731   int regno = REGNO (operands[0]);
5732   rtx ops[4];
5733   ops[0] = gen_rtx_REG (DImode, regno + 2);
5734   ops[1] = gen_rtx_REG (DImode, regno + 6);
5735   ops[2] = gen_rtx_REG (DImode, regno + 10);
5736   ops[3] = operands[1];
5737   output_asm_insn ("vld3.<V_sz_elem>\t{%P0, %P1, %P2}, %A3", ops);
5738   return "";
5739 }
5740   [(set_attr "type" "neon_load3_3reg<q>")]
5741 )
5742
5743 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5744 ;; here on big endian targets.
5745 (define_insn "neon_vld3_lane<mode>"
5746   [(set (match_operand:EI 0 "s_register_operand" "=w")
5747         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5748                     (match_operand:EI 2 "s_register_operand" "0")
5749                     (match_operand:SI 3 "immediate_operand" "i")
5750                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5751                    UNSPEC_VLD3_LANE))]
5752   "TARGET_NEON"
5753 {
5754   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N (<MODE>mode, INTVAL (operands[3]));
5755   int regno = REGNO (operands[0]);
5756   rtx ops[5];
5757   ops[0] = gen_rtx_REG (DImode, regno);
5758   ops[1] = gen_rtx_REG (DImode, regno + 2);
5759   ops[2] = gen_rtx_REG (DImode, regno + 4);
5760   ops[3] = operands[1];
5761   ops[4] = GEN_INT (lane);
5762   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5763                    ops);
5764   return "";
5765 }
5766   [(set_attr "type" "neon_load3_one_lane<q>")]
5767 )
5768
5769 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5770 ;; here on big endian targets.
5771 (define_insn "neon_vld3_lane<mode>"
5772   [(set (match_operand:CI 0 "s_register_operand" "=w")
5773         (unspec:CI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5774                     (match_operand:CI 2 "s_register_operand" "0")
5775                     (match_operand:SI 3 "immediate_operand" "i")
5776                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5777                    UNSPEC_VLD3_LANE))]
5778   "TARGET_NEON"
5779 {
5780   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
5781   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5782   int regno = REGNO (operands[0]);
5783   rtx ops[5];
5784   if (lane >= max / 2)
5785     {
5786       lane -= max / 2;
5787       regno += 2;
5788     }
5789   ops[0] = gen_rtx_REG (DImode, regno);
5790   ops[1] = gen_rtx_REG (DImode, regno + 4);
5791   ops[2] = gen_rtx_REG (DImode, regno + 8);
5792   ops[3] = operands[1];
5793   ops[4] = GEN_INT (lane);
5794   output_asm_insn ("vld3.<V_sz_elem>\t{%P0[%c4], %P1[%c4], %P2[%c4]}, %3",
5795                    ops);
5796   return "";
5797 }
5798   [(set_attr "type" "neon_load3_one_lane<q>")]
5799 )
5800
5801 (define_insn "neon_vld3_dup<mode>"
5802   [(set (match_operand:EI 0 "s_register_operand" "=w")
5803         (unspec:EI [(match_operand:<V_three_elem> 1 "neon_struct_operand" "Um")
5804                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5805                    UNSPEC_VLD3_DUP))]
5806   "TARGET_NEON"
5807 {
5808   if (GET_MODE_NUNITS (<MODE>mode) > 1)
5809     {
5810       int regno = REGNO (operands[0]);
5811       rtx ops[4];
5812       ops[0] = gen_rtx_REG (DImode, regno);
5813       ops[1] = gen_rtx_REG (DImode, regno + 2);
5814       ops[2] = gen_rtx_REG (DImode, regno + 4);
5815       ops[3] = operands[1];
5816       output_asm_insn ("vld3.<V_sz_elem>\t{%P0[], %P1[], %P2[]}, %3", ops);
5817       return "";
5818     }
5819   else
5820     return "vld1.<V_sz_elem>\t%h0, %A1";
5821 }
5822   [(set (attr "type")
5823       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
5824                     (const_string "neon_load3_all_lanes<q>")
5825                     (const_string "neon_load1_1reg<q>")))])
5826
5827 (define_expand "vec_store_lanesei<mode>"
5828   [(set (match_operand:EI 0 "neon_struct_operand")
5829         (unspec:EI [(match_operand:EI 1 "s_register_operand")
5830                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5831                    UNSPEC_VST3))]
5832   "TARGET_NEON")
5833
5834 (define_insn "neon_vst3<mode>"
5835   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5836         (unspec:EI [(match_operand:EI 1 "s_register_operand" "w")
5837                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5838                    UNSPEC_VST3))]
5839   "TARGET_NEON"
5840 {
5841   if (<V_sz_elem> == 64)
5842     return "vst1.64\t%h1, %A0";
5843   else
5844     return "vst3.<V_sz_elem>\t%h1, %A0";
5845 }
5846   [(set (attr "type")
5847       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5848                     (const_string "neon_store1_3reg<q>")
5849                     (const_string "neon_store3_one_lane<q>")))])
5850
5851 (define_expand "vec_store_lanesci<mode>"
5852   [(match_operand:CI 0 "neon_struct_operand")
5853    (match_operand:CI 1 "s_register_operand")
5854    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5855   "TARGET_NEON"
5856 {
5857   emit_insn (gen_neon_vst3<mode> (operands[0], operands[1]));
5858   DONE;
5859 })
5860
5861 (define_expand "neon_vst3<mode>"
5862   [(match_operand:CI 0 "neon_struct_operand")
5863    (match_operand:CI 1 "s_register_operand")
5864    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5865   "TARGET_NEON"
5866 {
5867   rtx mem;
5868
5869   mem = adjust_address (operands[0], EImode, 0);
5870   emit_insn (gen_neon_vst3qa<mode> (mem, operands[1]));
5871   mem = adjust_address (mem, EImode, GET_MODE_SIZE (EImode));
5872   emit_insn (gen_neon_vst3qb<mode> (mem, operands[1]));
5873   DONE;
5874 })
5875
5876 (define_insn "neon_vst3qa<mode>"
5877   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5878         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5879                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5880                    UNSPEC_VST3A))]
5881   "TARGET_NEON"
5882 {
5883   int regno = REGNO (operands[1]);
5884   rtx ops[4];
5885   ops[0] = operands[0];
5886   ops[1] = gen_rtx_REG (DImode, regno);
5887   ops[2] = gen_rtx_REG (DImode, regno + 4);
5888   ops[3] = gen_rtx_REG (DImode, regno + 8);
5889   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5890   return "";
5891 }
5892   [(set_attr "type" "neon_store3_3reg<q>")]
5893 )
5894
5895 (define_insn "neon_vst3qb<mode>"
5896   [(set (match_operand:EI 0 "neon_struct_operand" "=Um")
5897         (unspec:EI [(match_operand:CI 1 "s_register_operand" "w")
5898                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5899                    UNSPEC_VST3B))]
5900   "TARGET_NEON"
5901 {
5902   int regno = REGNO (operands[1]);
5903   rtx ops[4];
5904   ops[0] = operands[0];
5905   ops[1] = gen_rtx_REG (DImode, regno + 2);
5906   ops[2] = gen_rtx_REG (DImode, regno + 6);
5907   ops[3] = gen_rtx_REG (DImode, regno + 10);
5908   output_asm_insn ("vst3.<V_sz_elem>\t{%P1, %P2, %P3}, %A0", ops);
5909   return "";
5910 }
5911   [(set_attr "type" "neon_store3_3reg<q>")]
5912 )
5913
5914 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5915 ;; here on big endian targets.
5916 (define_insn "neon_vst3_lane<mode>"
5917   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5918         (unspec:<V_three_elem>
5919            [(match_operand:EI 1 "s_register_operand" "w")
5920             (match_operand:SI 2 "immediate_operand" "i")
5921             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5922            UNSPEC_VST3_LANE))]
5923   "TARGET_NEON"
5924 {
5925   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5926   int regno = REGNO (operands[1]);
5927   rtx ops[5];
5928   ops[0] = operands[0];
5929   ops[1] = gen_rtx_REG (DImode, regno);
5930   ops[2] = gen_rtx_REG (DImode, regno + 2);
5931   ops[3] = gen_rtx_REG (DImode, regno + 4);
5932   ops[4] = GEN_INT (lane);
5933   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5934                    ops);
5935   return "";
5936 }
5937   [(set_attr "type" "neon_store3_one_lane<q>")]
5938 )
5939
5940 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
5941 ;; here on big endian targets.
5942 (define_insn "neon_vst3_lane<mode>"
5943   [(set (match_operand:<V_three_elem> 0 "neon_struct_operand" "=Um")
5944         (unspec:<V_three_elem>
5945            [(match_operand:CI 1 "s_register_operand" "w")
5946             (match_operand:SI 2 "immediate_operand" "i")
5947             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5948            UNSPEC_VST3_LANE))]
5949   "TARGET_NEON"
5950 {
5951   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
5952   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
5953   int regno = REGNO (operands[1]);
5954   rtx ops[5];
5955   if (lane >= max / 2)
5956     {
5957       lane -= max / 2;
5958       regno += 2;
5959     }
5960   ops[0] = operands[0];
5961   ops[1] = gen_rtx_REG (DImode, regno);
5962   ops[2] = gen_rtx_REG (DImode, regno + 4);
5963   ops[3] = gen_rtx_REG (DImode, regno + 8);
5964   ops[4] = GEN_INT (lane);
5965   output_asm_insn ("vst3.<V_sz_elem>\t{%P1[%c4], %P2[%c4], %P3[%c4]}, %0",
5966                    ops);
5967   return "";
5968 }
5969   [(set_attr "type" "neon_store3_one_lane<q>")]
5970 )
5971
5972 (define_expand "vec_load_lanesoi<mode>"
5973   [(set (match_operand:OI 0 "s_register_operand")
5974         (unspec:OI [(match_operand:OI 1 "neon_struct_operand")
5975                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5976                    UNSPEC_VLD4))]
5977   "TARGET_NEON")
5978
5979 (define_insn "neon_vld4<mode>"
5980   [(set (match_operand:OI 0 "s_register_operand" "=w")
5981         (unspec:OI [(match_operand:OI 1 "neon_struct_operand" "Um")
5982                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
5983                    UNSPEC_VLD4))]
5984   "TARGET_NEON"
5985 {
5986   if (<V_sz_elem> == 64)
5987     return "vld1.64\t%h0, %A1";
5988   else
5989     return "vld4.<V_sz_elem>\t%h0, %A1";
5990 }
5991   [(set (attr "type")
5992       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
5993                     (const_string "neon_load1_4reg<q>")
5994                     (const_string "neon_load4_4reg<q>")))]
5995 )
5996
5997 (define_expand "vec_load_lanesxi<mode>"
5998   [(match_operand:XI 0 "s_register_operand")
5999    (match_operand:XI 1 "neon_struct_operand")
6000    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6001   "TARGET_NEON"
6002 {
6003   emit_insn (gen_neon_vld4<mode> (operands[0], operands[1]));
6004   DONE;
6005 })
6006
6007 (define_expand "neon_vld4<mode>"
6008   [(match_operand:XI 0 "s_register_operand")
6009    (match_operand:XI 1 "neon_struct_operand")
6010    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6011   "TARGET_NEON"
6012 {
6013   rtx mem;
6014
6015   mem = adjust_address (operands[1], OImode, 0);
6016   emit_insn (gen_neon_vld4qa<mode> (operands[0], mem));
6017   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6018   emit_insn (gen_neon_vld4qb<mode> (operands[0], mem, operands[0]));
6019   DONE;
6020 })
6021
6022 (define_insn "neon_vld4qa<mode>"
6023   [(set (match_operand:XI 0 "s_register_operand" "=w")
6024         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6025                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6026                    UNSPEC_VLD4A))]
6027   "TARGET_NEON"
6028 {
6029   int regno = REGNO (operands[0]);
6030   rtx ops[5];
6031   ops[0] = gen_rtx_REG (DImode, regno);
6032   ops[1] = gen_rtx_REG (DImode, regno + 4);
6033   ops[2] = gen_rtx_REG (DImode, regno + 8);
6034   ops[3] = gen_rtx_REG (DImode, regno + 12);
6035   ops[4] = operands[1];
6036   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6037   return "";
6038 }
6039   [(set_attr "type" "neon_load4_4reg<q>")]
6040 )
6041
6042 (define_insn "neon_vld4qb<mode>"
6043   [(set (match_operand:XI 0 "s_register_operand" "=w")
6044         (unspec:XI [(match_operand:OI 1 "neon_struct_operand" "Um")
6045                     (match_operand:XI 2 "s_register_operand" "0")
6046                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6047                    UNSPEC_VLD4B))]
6048   "TARGET_NEON"
6049 {
6050   int regno = REGNO (operands[0]);
6051   rtx ops[5];
6052   ops[0] = gen_rtx_REG (DImode, regno + 2);
6053   ops[1] = gen_rtx_REG (DImode, regno + 6);
6054   ops[2] = gen_rtx_REG (DImode, regno + 10);
6055   ops[3] = gen_rtx_REG (DImode, regno + 14);
6056   ops[4] = operands[1];
6057   output_asm_insn ("vld4.<V_sz_elem>\t{%P0, %P1, %P2, %P3}, %A4", ops);
6058   return "";
6059 }
6060   [(set_attr "type" "neon_load4_4reg<q>")]
6061 )
6062
6063 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6064 ;; here on big endian targets.
6065 (define_insn "neon_vld4_lane<mode>"
6066   [(set (match_operand:OI 0 "s_register_operand" "=w")
6067         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6068                     (match_operand:OI 2 "s_register_operand" "0")
6069                     (match_operand:SI 3 "immediate_operand" "i")
6070                     (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6071                    UNSPEC_VLD4_LANE))]
6072   "TARGET_NEON"
6073 {
6074   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6075   int regno = REGNO (operands[0]);
6076   rtx ops[6];
6077   ops[0] = gen_rtx_REG (DImode, regno);
6078   ops[1] = gen_rtx_REG (DImode, regno + 2);
6079   ops[2] = gen_rtx_REG (DImode, regno + 4);
6080   ops[3] = gen_rtx_REG (DImode, regno + 6);
6081   ops[4] = operands[1];
6082   ops[5] = GEN_INT (lane);
6083   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6084                    ops);
6085   return "";
6086 }
6087   [(set_attr "type" "neon_load4_one_lane<q>")]
6088 )
6089
6090 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6091 ;; here on big endian targets.
6092 (define_insn "neon_vld4_lane<mode>"
6093   [(set (match_operand:XI 0 "s_register_operand" "=w")
6094         (unspec:XI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6095                     (match_operand:XI 2 "s_register_operand" "0")
6096                     (match_operand:SI 3 "immediate_operand" "i")
6097                     (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6098                    UNSPEC_VLD4_LANE))]
6099   "TARGET_NEON"
6100 {
6101   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[3]));
6102   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6103   int regno = REGNO (operands[0]);
6104   rtx ops[6];
6105   if (lane >= max / 2)
6106     {
6107       lane -= max / 2;
6108       regno += 2;
6109     }
6110   ops[0] = gen_rtx_REG (DImode, regno);
6111   ops[1] = gen_rtx_REG (DImode, regno + 4);
6112   ops[2] = gen_rtx_REG (DImode, regno + 8);
6113   ops[3] = gen_rtx_REG (DImode, regno + 12);
6114   ops[4] = operands[1];
6115   ops[5] = GEN_INT (lane);
6116   output_asm_insn ("vld4.<V_sz_elem>\t{%P0[%c5], %P1[%c5], %P2[%c5], %P3[%c5]}, %A4",
6117                    ops);
6118   return "";
6119 }
6120   [(set_attr "type" "neon_load4_one_lane<q>")]
6121 )
6122
6123 (define_insn "neon_vld4_dup<mode>"
6124   [(set (match_operand:OI 0 "s_register_operand" "=w")
6125         (unspec:OI [(match_operand:<V_four_elem> 1 "neon_struct_operand" "Um")
6126                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6127                    UNSPEC_VLD4_DUP))]
6128   "TARGET_NEON"
6129 {
6130   if (GET_MODE_NUNITS (<MODE>mode) > 1)
6131     {
6132       int regno = REGNO (operands[0]);
6133       rtx ops[5];
6134       ops[0] = gen_rtx_REG (DImode, regno);
6135       ops[1] = gen_rtx_REG (DImode, regno + 2);
6136       ops[2] = gen_rtx_REG (DImode, regno + 4);
6137       ops[3] = gen_rtx_REG (DImode, regno + 6);
6138       ops[4] = operands[1];
6139       output_asm_insn ("vld4.<V_sz_elem>\t{%P0[], %P1[], %P2[], %P3[]}, %A4",
6140                        ops);
6141       return "";
6142     }
6143   else
6144     return "vld1.<V_sz_elem>\t%h0, %A1";
6145 }
6146   [(set (attr "type")
6147       (if_then_else (gt (const_string "<V_mode_nunits>") (const_string "1"))
6148                     (const_string "neon_load4_all_lanes<q>")
6149                     (const_string "neon_load1_1reg<q>")))]
6150 )
6151
6152 (define_expand "vec_store_lanesoi<mode>"
6153   [(set (match_operand:OI 0 "neon_struct_operand")
6154         (unspec:OI [(match_operand:OI 1 "s_register_operand")
6155                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6156                    UNSPEC_VST4))]
6157   "TARGET_NEON")
6158
6159 (define_insn "neon_vst4<mode>"
6160   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6161         (unspec:OI [(match_operand:OI 1 "s_register_operand" "w")
6162                     (unspec:VDX [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6163                    UNSPEC_VST4))]
6164   "TARGET_NEON"
6165 {
6166   if (<V_sz_elem> == 64)
6167     return "vst1.64\t%h1, %A0";
6168   else
6169     return "vst4.<V_sz_elem>\t%h1, %A0";
6170 }
6171   [(set (attr "type")
6172       (if_then_else (eq (const_string "<V_sz_elem>") (const_string "64"))
6173                     (const_string "neon_store1_4reg<q>")
6174                     (const_string "neon_store4_4reg<q>")))]
6175 )
6176
6177 (define_expand "vec_store_lanesxi<mode>"
6178   [(match_operand:XI 0 "neon_struct_operand")
6179    (match_operand:XI 1 "s_register_operand")
6180    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6181   "TARGET_NEON"
6182 {
6183   emit_insn (gen_neon_vst4<mode> (operands[0], operands[1]));
6184   DONE;
6185 })
6186
6187 (define_expand "neon_vst4<mode>"
6188   [(match_operand:XI 0 "neon_struct_operand")
6189    (match_operand:XI 1 "s_register_operand")
6190    (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6191   "TARGET_NEON"
6192 {
6193   rtx mem;
6194
6195   mem = adjust_address (operands[0], OImode, 0);
6196   emit_insn (gen_neon_vst4qa<mode> (mem, operands[1]));
6197   mem = adjust_address (mem, OImode, GET_MODE_SIZE (OImode));
6198   emit_insn (gen_neon_vst4qb<mode> (mem, operands[1]));
6199   DONE;
6200 })
6201
6202 (define_insn "neon_vst4qa<mode>"
6203   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6204         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6205                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6206                    UNSPEC_VST4A))]
6207   "TARGET_NEON"
6208 {
6209   int regno = REGNO (operands[1]);
6210   rtx ops[5];
6211   ops[0] = operands[0];
6212   ops[1] = gen_rtx_REG (DImode, regno);
6213   ops[2] = gen_rtx_REG (DImode, regno + 4);
6214   ops[3] = gen_rtx_REG (DImode, regno + 8);
6215   ops[4] = gen_rtx_REG (DImode, regno + 12);
6216   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6217   return "";
6218 }
6219   [(set_attr "type" "neon_store4_4reg<q>")]
6220 )
6221
6222 (define_insn "neon_vst4qb<mode>"
6223   [(set (match_operand:OI 0 "neon_struct_operand" "=Um")
6224         (unspec:OI [(match_operand:XI 1 "s_register_operand" "w")
6225                     (unspec:VQ2 [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6226                    UNSPEC_VST4B))]
6227   "TARGET_NEON"
6228 {
6229   int regno = REGNO (operands[1]);
6230   rtx ops[5];
6231   ops[0] = operands[0];
6232   ops[1] = gen_rtx_REG (DImode, regno + 2);
6233   ops[2] = gen_rtx_REG (DImode, regno + 6);
6234   ops[3] = gen_rtx_REG (DImode, regno + 10);
6235   ops[4] = gen_rtx_REG (DImode, regno + 14);
6236   output_asm_insn ("vst4.<V_sz_elem>\t{%P1, %P2, %P3, %P4}, %A0", ops);
6237   return "";
6238 }
6239   [(set_attr "type" "neon_store4_4reg<q>")]
6240 )
6241
6242 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6243 ;; here on big endian targets.
6244 (define_insn "neon_vst4_lane<mode>"
6245   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6246         (unspec:<V_four_elem>
6247            [(match_operand:OI 1 "s_register_operand" "w")
6248             (match_operand:SI 2 "immediate_operand" "i")
6249             (unspec:VD_LANE [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6250            UNSPEC_VST4_LANE))]
6251   "TARGET_NEON"
6252 {
6253   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6254   int regno = REGNO (operands[1]);
6255   rtx ops[6];
6256   ops[0] = operands[0];
6257   ops[1] = gen_rtx_REG (DImode, regno);
6258   ops[2] = gen_rtx_REG (DImode, regno + 2);
6259   ops[3] = gen_rtx_REG (DImode, regno + 4);
6260   ops[4] = gen_rtx_REG (DImode, regno + 6);
6261   ops[5] = GEN_INT (lane);
6262   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6263                    ops);
6264   return "";
6265 }
6266   [(set_attr "type" "neon_store4_one_lane<q>")]
6267 )
6268
6269 ;; see comment on neon_vld1_lane for reason why the lane numbers are reversed
6270 ;; here on big endian targets.
6271 (define_insn "neon_vst4_lane<mode>"
6272   [(set (match_operand:<V_four_elem> 0 "neon_struct_operand" "=Um")
6273         (unspec:<V_four_elem>
6274            [(match_operand:XI 1 "s_register_operand" "w")
6275             (match_operand:SI 2 "immediate_operand" "i")
6276             (unspec:VQ_HS [(const_int 0)] UNSPEC_VSTRUCTDUMMY)]
6277            UNSPEC_VST4_LANE))]
6278   "TARGET_NEON"
6279 {
6280   HOST_WIDE_INT lane = NEON_ENDIAN_LANE_N(<MODE>mode, INTVAL (operands[2]));
6281   HOST_WIDE_INT max = GET_MODE_NUNITS (<MODE>mode);
6282   int regno = REGNO (operands[1]);
6283   rtx ops[6];
6284   if (lane >= max / 2)
6285     {
6286       lane -= max / 2;
6287       regno += 2;
6288     }
6289   ops[0] = operands[0];
6290   ops[1] = gen_rtx_REG (DImode, regno);
6291   ops[2] = gen_rtx_REG (DImode, regno + 4);
6292   ops[3] = gen_rtx_REG (DImode, regno + 8);
6293   ops[4] = gen_rtx_REG (DImode, regno + 12);
6294   ops[5] = GEN_INT (lane);
6295   output_asm_insn ("vst4.<V_sz_elem>\t{%P1[%c5], %P2[%c5], %P3[%c5], %P4[%c5]}, %A0",
6296                    ops);
6297   return "";
6298 }
6299   [(set_attr "type" "neon_store4_4reg<q>")]
6300 )
6301
6302 (define_insn "neon_vec_unpack<US>_lo_<mode>"
6303   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6304         (SE:<V_unpack> (vec_select:<V_HALF>
6305                           (match_operand:VU 1 "register_operand" "w")
6306                           (match_operand:VU 2 "vect_par_constant_low" ""))))]
6307   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6308   "vmovl.<US><V_sz_elem> %q0, %e1"
6309   [(set_attr "type" "neon_shift_imm_long")]
6310 )
6311
6312 (define_insn "neon_vec_unpack<US>_hi_<mode>"
6313   [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6314         (SE:<V_unpack> (vec_select:<V_HALF>
6315                           (match_operand:VU 1 "register_operand" "w")
6316                           (match_operand:VU 2 "vect_par_constant_high" ""))))]
6317   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6318   "vmovl.<US><V_sz_elem> %q0, %f1"
6319   [(set_attr "type" "neon_shift_imm_long")]
6320 )
6321
6322 (define_expand "vec_unpack<US>_hi_<mode>"
6323   [(match_operand:<V_unpack> 0 "register_operand")
6324    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6325  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6326   {
6327    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6328    rtx t1;
6329    int i;
6330    for (i = 0; i < (<V_mode_nunits>/2); i++)
6331      RTVEC_ELT (v, i) = GEN_INT ((<V_mode_nunits>/2) + i);
6332
6333    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6334    emit_insn (gen_neon_vec_unpack<US>_hi_<mode> (operands[0],
6335                                                  operands[1],
6336                                                  t1));
6337    DONE;
6338   }
6339 )
6340
6341 (define_expand "vec_unpack<US>_lo_<mode>"
6342   [(match_operand:<V_unpack> 0 "register_operand")
6343    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))]
6344  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6345   {
6346    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6347    rtx t1;
6348    int i;
6349    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6350      RTVEC_ELT (v, i) = GEN_INT (i);
6351    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6352    emit_insn (gen_neon_vec_unpack<US>_lo_<mode> (operands[0],
6353                                                  operands[1],
6354                                                  t1));
6355    DONE;
6356   }
6357 )
6358
6359 (define_insn "neon_vec_<US>mult_lo_<mode>"
6360  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6361        (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6362                            (match_operand:VU 1 "register_operand" "w")
6363                            (match_operand:VU 2 "vect_par_constant_low" "")))
6364                         (SE:<V_unpack> (vec_select:<V_HALF>
6365                            (match_operand:VU 3 "register_operand" "w")
6366                            (match_dup 2)))))]
6367   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6368   "vmull.<US><V_sz_elem> %q0, %e1, %e3"
6369   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6370 )
6371
6372 (define_expand "vec_widen_<US>mult_lo_<mode>"
6373   [(match_operand:<V_unpack> 0 "register_operand")
6374    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6375    (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6376  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6377  {
6378    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6379    rtx t1;
6380    int i;
6381    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6382      RTVEC_ELT (v, i) = GEN_INT (i);
6383    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6384
6385    emit_insn (gen_neon_vec_<US>mult_lo_<mode> (operands[0],
6386                                                operands[1],
6387                                                t1,
6388                                                operands[2]));
6389    DONE;
6390  }
6391 )
6392
6393 (define_insn "neon_vec_<US>mult_hi_<mode>"
6394  [(set (match_operand:<V_unpack> 0 "register_operand" "=w")
6395       (mult:<V_unpack> (SE:<V_unpack> (vec_select:<V_HALF>
6396                             (match_operand:VU 1 "register_operand" "w")
6397                             (match_operand:VU 2 "vect_par_constant_high" "")))
6398                        (SE:<V_unpack> (vec_select:<V_HALF>
6399                             (match_operand:VU 3 "register_operand" "w")
6400                             (match_dup 2)))))]
6401   "TARGET_NEON && !BYTES_BIG_ENDIAN"
6402   "vmull.<US><V_sz_elem> %q0, %f1, %f3"
6403   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6404 )
6405
6406 (define_expand "vec_widen_<US>mult_hi_<mode>"
6407   [(match_operand:<V_unpack> 0 "register_operand")
6408    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6409    (SE:<V_unpack> (match_operand:VU 2 "register_operand"))]
6410  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6411  {
6412    rtvec v = rtvec_alloc (<V_mode_nunits>/2)  ;
6413    rtx t1;
6414    int i;
6415    for (i = 0; i < (<V_mode_nunits>/2) ; i++)
6416      RTVEC_ELT (v, i) = GEN_INT (<V_mode_nunits>/2 + i);
6417    t1 = gen_rtx_PARALLEL (<MODE>mode, v);
6418
6419    emit_insn (gen_neon_vec_<US>mult_hi_<mode> (operands[0],
6420                                                operands[1],
6421                                                t1,
6422                                                operands[2]));
6423    DONE;
6424
6425  }
6426 )
6427
6428 (define_insn "neon_vec_<US>shiftl_<mode>"
6429  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6430        (SE:<V_widen> (ashift:VW (match_operand:VW 1 "register_operand" "w")
6431        (match_operand:<V_innermode> 2 "const_neon_scalar_shift_amount_operand" ""))))]
6432   "TARGET_NEON"
6433 {
6434   return "vshll.<US><V_sz_elem> %q0, %P1, %2";
6435 }
6436   [(set_attr "type" "neon_shift_imm_long")]
6437 )
6438
6439 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6440   [(match_operand:<V_unpack> 0 "register_operand")
6441    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6442    (match_operand:SI 2 "immediate_operand")]
6443  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6444  {
6445   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6446                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode, 0),
6447                 operands[2]));
6448    DONE;
6449  }
6450 )
6451
6452 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6453   [(match_operand:<V_unpack> 0 "register_operand")
6454    (SE:<V_unpack> (match_operand:VU 1 "register_operand"))
6455    (match_operand:SI 2 "immediate_operand")]
6456  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6457  {
6458   emit_insn (gen_neon_vec_<US>shiftl_<V_half> (operands[0],
6459                 simplify_gen_subreg (<V_HALF>mode, operands[1], <MODE>mode,
6460                                      GET_MODE_SIZE (<V_HALF>mode)),
6461                 operands[2]));
6462    DONE;
6463  }
6464 )
6465
6466 ;; Vectorize for non-neon-quad case
6467 (define_insn "neon_unpack<US>_<mode>"
6468  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6469        (SE:<V_widen> (match_operand:VDI 1 "register_operand" "w")))]
6470  "TARGET_NEON"
6471  "vmovl.<US><V_sz_elem> %q0, %P1"
6472   [(set_attr "type" "neon_move")]
6473 )
6474
6475 (define_expand "vec_unpack<US>_lo_<mode>"
6476  [(match_operand:<V_double_width> 0 "register_operand")
6477   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6478  "TARGET_NEON"
6479 {
6480   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6481   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6482   emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6483
6484   DONE;
6485 }
6486 )
6487
6488 (define_expand "vec_unpack<US>_hi_<mode>"
6489  [(match_operand:<V_double_width> 0 "register_operand")
6490   (SE:<V_double_width>(match_operand:VDI 1 "register_operand"))]
6491  "TARGET_NEON"
6492 {
6493   rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6494   emit_insn (gen_neon_unpack<US>_<mode> (tmpreg, operands[1]));
6495   emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6496
6497   DONE;
6498 }
6499 )
6500
6501 (define_insn "neon_vec_<US>mult_<mode>"
6502  [(set (match_operand:<V_widen> 0 "register_operand" "=w")
6503        (mult:<V_widen> (SE:<V_widen>
6504                            (match_operand:VDI 1 "register_operand" "w"))
6505                        (SE:<V_widen>
6506                            (match_operand:VDI 2 "register_operand" "w"))))]
6507   "TARGET_NEON"
6508   "vmull.<US><V_sz_elem> %q0, %P1, %P2"
6509   [(set_attr "type" "neon_mul_<V_elem_ch>_long")]
6510 )
6511
6512 (define_expand "vec_widen_<US>mult_hi_<mode>"
6513   [(match_operand:<V_double_width> 0 "register_operand")
6514    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6515    (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6516  "TARGET_NEON"
6517  {
6518    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6519    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6520    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6521
6522    DONE;
6523
6524  }
6525 )
6526
6527 (define_expand "vec_widen_<US>mult_lo_<mode>"
6528   [(match_operand:<V_double_width> 0 "register_operand")
6529    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6530    (SE:<V_double_width> (match_operand:VDI 2 "register_operand"))]
6531  "TARGET_NEON"
6532  {
6533    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6534    emit_insn (gen_neon_vec_<US>mult_<mode> (tmpreg, operands[1], operands[2]));
6535    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6536
6537    DONE;
6538
6539  }
6540 )
6541
6542 (define_expand "vec_widen_<US>shiftl_hi_<mode>"
6543  [(match_operand:<V_double_width> 0 "register_operand")
6544    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6545    (match_operand:SI 2 "immediate_operand")]
6546  "TARGET_NEON"
6547  {
6548    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6549    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6550    emit_insn (gen_neon_vget_high<V_widen_l> (operands[0], tmpreg));
6551
6552    DONE;
6553  }
6554 )
6555
6556 (define_expand "vec_widen_<US>shiftl_lo_<mode>"
6557   [(match_operand:<V_double_width> 0 "register_operand")
6558    (SE:<V_double_width> (match_operand:VDI 1 "register_operand"))
6559    (match_operand:SI 2 "immediate_operand")]
6560  "TARGET_NEON"
6561  {
6562    rtx tmpreg = gen_reg_rtx (<V_widen>mode);
6563    emit_insn (gen_neon_vec_<US>shiftl_<mode> (tmpreg, operands[1], operands[2]));
6564    emit_insn (gen_neon_vget_low<V_widen_l> (operands[0], tmpreg));
6565
6566    DONE;
6567  }
6568 )
6569
6570 ; FIXME: These instruction patterns can't be used safely in big-endian mode
6571 ; because the ordering of vector elements in Q registers is different from what
6572 ; the semantics of the instructions require.
6573
6574 (define_insn "vec_pack_trunc_<mode>"
6575  [(set (match_operand:<V_narrow_pack> 0 "register_operand" "=&w")
6576        (vec_concat:<V_narrow_pack>
6577                 (truncate:<V_narrow>
6578                         (match_operand:VN 1 "register_operand" "w"))
6579                 (truncate:<V_narrow>
6580                         (match_operand:VN 2 "register_operand" "w"))))]
6581  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6582  "vmovn.i<V_sz_elem>\t%e0, %q1\;vmovn.i<V_sz_elem>\t%f0, %q2"
6583  [(set_attr "type" "multiple")
6584   (set_attr "length" "8")]
6585 )
6586
6587 ;; For the non-quad case.
6588 (define_insn "neon_vec_pack_trunc_<mode>"
6589  [(set (match_operand:<V_narrow> 0 "register_operand" "=w")
6590        (truncate:<V_narrow> (match_operand:VN 1 "register_operand" "w")))]
6591  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6592  "vmovn.i<V_sz_elem>\t%P0, %q1"
6593  [(set_attr "type" "neon_move_narrow_q")]
6594 )
6595
6596 (define_expand "vec_pack_trunc_<mode>"
6597  [(match_operand:<V_narrow_pack> 0 "register_operand")
6598   (match_operand:VSHFT 1 "register_operand")
6599   (match_operand:VSHFT 2 "register_operand")]
6600  "TARGET_NEON && !BYTES_BIG_ENDIAN"
6601 {
6602   rtx tempreg = gen_reg_rtx (<V_DOUBLE>mode);
6603
6604   emit_insn (gen_move_lo_quad_<V_double> (tempreg, operands[1]));
6605   emit_insn (gen_move_hi_quad_<V_double> (tempreg, operands[2]));
6606   emit_insn (gen_neon_vec_pack_trunc_<V_double> (operands[0], tempreg));
6607   DONE;
6608 })
6609
6610 (define_insn "neon_vabd<mode>_2"
6611  [(set (match_operand:VF 0 "s_register_operand" "=w")
6612        (abs:VF (minus:VF (match_operand:VF 1 "s_register_operand" "w")
6613                          (match_operand:VF 2 "s_register_operand" "w"))))]
6614  "TARGET_NEON && flag_unsafe_math_optimizations"
6615  "vabd.<V_s_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6616  [(set_attr "type" "neon_fp_abd_s<q>")]
6617 )
6618
6619 (define_insn "neon_vabd<mode>_3"
6620  [(set (match_operand:VF 0 "s_register_operand" "=w")
6621        (abs:VF (unspec:VF [(match_operand:VF 1 "s_register_operand" "w")
6622                             (match_operand:VF 2 "s_register_operand" "w")]
6623                 UNSPEC_VSUB)))]
6624  "TARGET_NEON && flag_unsafe_math_optimizations"
6625  "vabd.<V_if_elem> %<V_reg>0, %<V_reg>1, %<V_reg>2"
6626  [(set_attr "type" "neon_fp_abd_s<q>")]
6627 )