Daily bump.
[gcc.git] / gcc / config / nvptx / nvptx.md
1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2021 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_c_enum "unspec" [
22 UNSPEC_ARG_REG
23
24 UNSPEC_COPYSIGN
25 UNSPEC_LOG2
26 UNSPEC_EXP2
27 UNSPEC_SIN
28 UNSPEC_COS
29
30 UNSPEC_FPINT_FLOOR
31 UNSPEC_FPINT_BTRUNC
32 UNSPEC_FPINT_CEIL
33 UNSPEC_FPINT_NEARBYINT
34
35 UNSPEC_BITREV
36
37 UNSPEC_ALLOCA
38
39 UNSPEC_SET_SOFTSTACK
40
41 UNSPEC_DIM_SIZE
42
43 UNSPEC_BIT_CONV
44
45 UNSPEC_VOTE_BALLOT
46
47 UNSPEC_LANEID
48
49 UNSPEC_SHUFFLE
50 UNSPEC_BR_UNIFIED
51 ])
52
53 (define_c_enum "unspecv" [
54 UNSPECV_LOCK
55 UNSPECV_CAS
56 UNSPECV_XCHG
57 UNSPECV_BARSYNC
58 UNSPECV_MEMBAR
59 UNSPECV_MEMBAR_CTA
60 UNSPECV_DIM_POS
61
62 UNSPECV_FORK
63 UNSPECV_FORKED
64 UNSPECV_JOINING
65 UNSPECV_JOIN
66
67 UNSPECV_NOUNROLL
68
69 UNSPECV_SIMT_ENTER
70 UNSPECV_SIMT_EXIT
71
72 UNSPECV_RED_PART
73 ])
74
75 (define_attr "subregs_ok" "false,true"
76 (const_string "false"))
77
78 (define_attr "atomic" "false,true"
79 (const_string "false"))
80
81 ;; The nvptx operand predicates, in general, don't permit subregs and
82 ;; only literal constants, which differ from the generic ones, which
83 ;; permit subregs and symbolc constants (as appropriate)
84 (define_predicate "nvptx_register_operand"
85 (match_code "reg")
86 {
87 return register_operand (op, mode);
88 })
89
90 (define_predicate "nvptx_nonimmediate_operand"
91 (match_code "mem,reg")
92 {
93 return (REG_P (op) ? register_operand (op, mode)
94 : memory_operand (op, mode));
95 })
96
97 (define_predicate "nvptx_nonmemory_operand"
98 (match_code "reg,const_int,const_double")
99 {
100 return (REG_P (op) ? register_operand (op, mode)
101 : immediate_operand (op, mode));
102 })
103
104 (define_predicate "const0_operand"
105 (and (match_code "const_int")
106 (match_test "op == const0_rtx")))
107
108 ;; True if this operator is valid for predication.
109 (define_predicate "predicate_operator"
110 (match_code "eq,ne"))
111
112 (define_predicate "ne_operator"
113 (match_code "ne"))
114
115 (define_predicate "nvptx_comparison_operator"
116 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
117
118 (define_predicate "nvptx_float_comparison_operator"
119 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
120
121 (define_predicate "nvptx_vector_index_operand"
122 (and (match_code "const_int")
123 (match_test "UINTVAL (op) < 4")))
124
125 ;; Test for a valid operand for a call instruction.
126 (define_predicate "call_insn_operand"
127 (match_code "symbol_ref,reg")
128 {
129 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
130 })
131
132 ;; Return true if OP is a call with parallel USEs of the argument
133 ;; pseudos.
134 (define_predicate "call_operation"
135 (match_code "parallel")
136 {
137 int arg_end = XVECLEN (op, 0);
138
139 for (int i = 1; i < arg_end; i++)
140 {
141 rtx elt = XVECEXP (op, 0, i);
142
143 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
144 return false;
145 }
146 return true;
147 })
148
149 ;; Test for a function symbol ref operand
150 (define_predicate "symbol_ref_function_operand"
151 (match_code "symbol_ref")
152 {
153 return SYMBOL_REF_FUNCTION_P (op);
154 })
155
156 (define_attr "predicable" "false,true"
157 (const_string "true"))
158
159 (define_cond_exec
160 [(match_operator 0 "predicate_operator"
161 [(match_operand:BI 1 "nvptx_register_operand" "")
162 (match_operand:BI 2 "const0_operand" "")])]
163 ""
164 ""
165 )
166
167 (define_constraint "P0"
168 "An integer with the value 0."
169 (and (match_code "const_int")
170 (match_test "ival == 0")))
171
172 (define_constraint "P1"
173 "An integer with the value 1."
174 (and (match_code "const_int")
175 (match_test "ival == 1")))
176
177 (define_constraint "Pn"
178 "An integer with the value -1."
179 (and (match_code "const_int")
180 (match_test "ival == -1")))
181
182 (define_constraint "R"
183 "A pseudo register."
184 (match_code "reg"))
185
186 (define_constraint "Ia"
187 "Any integer constant."
188 (and (match_code "const_int") (match_test "true")))
189
190 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
191 (define_mode_iterator QHSDIM [QI HI SI DI])
192 (define_mode_iterator HSDIM [HI SI DI])
193 (define_mode_iterator BHSDIM [BI HI SI DI])
194 (define_mode_iterator SDIM [SI DI])
195 (define_mode_iterator SDISDFM [SI DI SF DF])
196 (define_mode_iterator QHIM [QI HI])
197 (define_mode_iterator QHSIM [QI HI SI])
198 (define_mode_iterator SDFM [SF DF])
199 (define_mode_iterator SDCM [SC DC])
200 (define_mode_iterator BITS [SI SF])
201 (define_mode_iterator BITD [DI DF])
202 (define_mode_iterator VECIM [V2SI V2DI])
203
204 ;; This mode iterator allows :P to be used for patterns that operate on
205 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
206 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
207
208 ;; Define element mode for each vector mode.
209 (define_mode_attr VECELEM [(V2SI "SI") (V2DI "DI")])
210 (define_mode_attr Vecelem [(V2SI "si") (V2DI "di")])
211
212 ;; We should get away with not defining memory alternatives, since we don't
213 ;; get variables in this mode and pseudos are never spilled.
214 (define_insn "movbi"
215 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
216 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
217 ""
218 "@
219 %.\\tmov%t0\\t%0, %1;
220 %.\\tsetp.eq.u32\\t%0, 1, 0;
221 %.\\tsetp.eq.u32\\t%0, 1, 1;")
222
223 (define_insn "*mov<mode>_insn"
224 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
225 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
226 "!MEM_P (operands[0]) || REG_P (operands[1])"
227 {
228 if (which_alternative == 1)
229 return "%.\\tld%A1%u1\\t%0, %1;";
230 if (which_alternative == 2)
231 return "%.\\tst%A0%u0\\t%0, %1;";
232
233 return nvptx_output_mov_insn (operands[0], operands[1]);
234 }
235 [(set_attr "subregs_ok" "true")])
236
237 (define_insn "*mov<mode>_insn"
238 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
239 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
240 "!MEM_P (operands[0]) || REG_P (operands[1])"
241 {
242 if (which_alternative == 1)
243 return "%.\\tld%A1%u1\\t%0, %1;";
244 if (which_alternative == 2)
245 return "%.\\tst%A0%u0\\t%0, %1;";
246
247 return nvptx_output_mov_insn (operands[0], operands[1]);
248 }
249 [(set_attr "subregs_ok" "true")])
250
251 ;; ptxas segfaults on 'mov.u64 %r24,bar+4096', so break it up.
252 (define_split
253 [(set (match_operand:DI 0 "nvptx_register_operand")
254 (const:DI (plus:DI (match_operand:DI 1 "symbol_ref_function_operand")
255 (match_operand 2 "const_int_operand"))))]
256 ""
257 [(set (match_dup 0) (match_dup 1))
258 (set (match_dup 0) (plus:DI (match_dup 0) (match_dup 2)))
259 ]
260 "")
261
262 (define_insn "*mov<mode>_insn"
263 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
264 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
265 "!MEM_P (operands[0]) || REG_P (operands[1])"
266 {
267 if (which_alternative == 1)
268 return "%.\\tld%A1%u0\\t%0, %1;";
269 if (which_alternative == 2)
270 return "%.\\tst%A0%u1\\t%0, %1;";
271
272 return nvptx_output_mov_insn (operands[0], operands[1]);
273 }
274 [(set_attr "subregs_ok" "true")])
275
276 (define_insn "load_arg_reg<mode>"
277 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
278 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
279 UNSPEC_ARG_REG))]
280 ""
281 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
282
283 (define_insn "load_arg_reg<mode>"
284 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
285 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
286 UNSPEC_ARG_REG))]
287 ""
288 "%.\\tmov%t0\\t%0, %%ar%1;")
289
290 (define_expand "mov<mode>"
291 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
292 (match_operand:VECIM 1 "general_operand" ""))]
293 ""
294 {
295 if (MEM_P (operands[0]) && !REG_P (operands[1]))
296 {
297 rtx tmp = gen_reg_rtx (<MODE>mode);
298 emit_move_insn (tmp, operands[1]);
299 emit_move_insn (operands[0], tmp);
300 DONE;
301 }
302 })
303
304 (define_expand "mov<mode>"
305 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
306 (match_operand:QHSDISDFM 1 "general_operand" ""))]
307 ""
308 {
309 if (MEM_P (operands[0]) && !REG_P (operands[1]))
310 {
311 rtx tmp = gen_reg_rtx (<MODE>mode);
312 emit_move_insn (tmp, operands[1]);
313 emit_move_insn (operands[0], tmp);
314 DONE;
315 }
316
317 if (GET_CODE (operands[1]) == LABEL_REF)
318 sorry ("target cannot support label values");
319 })
320
321 (define_insn "zero_extendqihi2"
322 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
323 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
324 ""
325 "@
326 %.\\tcvt.u16.u%T1\\t%0, %1;
327 %.\\tld%A1.u8\\t%0, %1;"
328 [(set_attr "subregs_ok" "true")])
329
330 (define_insn "zero_extend<mode>si2"
331 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
332 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
333 ""
334 "@
335 %.\\tcvt.u32.u%T1\\t%0, %1;
336 %.\\tld%A1.u%T1\\t%0, %1;"
337 [(set_attr "subregs_ok" "true")])
338
339 (define_insn "zero_extend<mode>di2"
340 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
341 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
342 ""
343 "@
344 %.\\tcvt.u64.u%T1\\t%0, %1;
345 %.\\tld%A1%u1\\t%0, %1;"
346 [(set_attr "subregs_ok" "true")])
347
348 (define_insn "extendqihi2"
349 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
350 (sign_extend:HI (match_operand:QI 1 "nvptx_register_operand" "R")))]
351 ""
352 "%.\\tcvt.s16.s8\\t%0, %1;"
353 [(set_attr "subregs_ok" "true")])
354
355 (define_insn "extend<mode>si2"
356 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
357 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
358 ""
359 "@
360 %.\\tcvt.s32.s%T1\\t%0, %1;
361 %.\\tld%A1.s%T1\\t%0, %1;"
362 [(set_attr "subregs_ok" "true")])
363
364 (define_insn "extend<mode>di2"
365 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
366 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
367 ""
368 "@
369 %.\\tcvt.s64.s%T1\\t%0, %1;
370 %.\\tld%A1.s%T1\\t%0, %1;"
371 [(set_attr "subregs_ok" "true")])
372
373 (define_insn "trunchiqi2"
374 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
375 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
376 ""
377 "@
378 %.\\tcvt%t0.u16\\t%0, %1;
379 %.\\tst%A0.u8\\t%0, %1;"
380 [(set_attr "subregs_ok" "true")])
381
382 (define_insn "truncsi<mode>2"
383 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
384 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
385 ""
386 {
387 if (which_alternative == 1)
388 return "%.\\tst%A0.u%T0\\t%0, %1;";
389 if (GET_MODE (operands[0]) == QImode)
390 return "%.\\tmov%t0\\t%0, %1;";
391 return "%.\\tcvt%t0.u32\\t%0, %1;";
392 }
393 [(set_attr "subregs_ok" "true")])
394
395 (define_insn "truncdi<mode>2"
396 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
397 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
398 ""
399 "@
400 %.\\tcvt%t0.u64\\t%0, %1;
401 %.\\tst%A0.u%T0\\t%0, %1;"
402 [(set_attr "subregs_ok" "true")])
403
404 ;; Integer arithmetic
405
406 (define_insn "add<mode>3"
407 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
408 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
409 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
410 ""
411 "%.\\tadd%t0\\t%0, %1, %2;")
412
413 (define_insn "*vadd_addsi4"
414 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
415 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
416 (match_operand:SI 2 "nvptx_register_operand" "R"))
417 (match_operand:SI 3 "nvptx_register_operand" "R")))]
418 ""
419 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
420
421 (define_insn "*vsub_addsi4"
422 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
423 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
424 (match_operand:SI 2 "nvptx_register_operand" "R"))
425 (match_operand:SI 3 "nvptx_register_operand" "R")))]
426 ""
427 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
428
429 (define_insn "sub<mode>3"
430 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
431 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
432 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
433 ""
434 "%.\\tsub%t0\\t%0, %1, %2;")
435
436 (define_insn "mul<mode>3"
437 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
438 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
439 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
440 ""
441 "%.\\tmul.lo%t0\\t%0, %1, %2;")
442
443 (define_insn "*mad<mode>3"
444 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
445 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
446 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
447 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
448 ""
449 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
450
451 (define_insn "div<mode>3"
452 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
453 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
454 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
455 ""
456 "%.\\tdiv.s%T0\\t%0, %1, %2;")
457
458 (define_insn "udiv<mode>3"
459 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
460 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
461 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
462 ""
463 "%.\\tdiv.u%T0\\t%0, %1, %2;")
464
465 (define_insn "mod<mode>3"
466 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
467 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
468 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
469 ""
470 "%.\\trem.s%T0\\t%0, %1, %2;")
471
472 (define_insn "umod<mode>3"
473 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
474 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
475 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
476 ""
477 "%.\\trem.u%T0\\t%0, %1, %2;")
478
479 (define_insn "smin<mode>3"
480 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
481 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
482 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
483 ""
484 "%.\\tmin.s%T0\\t%0, %1, %2;")
485
486 (define_insn "umin<mode>3"
487 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
488 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
489 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
490 ""
491 "%.\\tmin.u%T0\\t%0, %1, %2;")
492
493 (define_insn "smax<mode>3"
494 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
495 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
496 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
497 ""
498 "%.\\tmax.s%T0\\t%0, %1, %2;")
499
500 (define_insn "umax<mode>3"
501 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
502 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
503 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
504 ""
505 "%.\\tmax.u%T0\\t%0, %1, %2;")
506
507 (define_insn "abs<mode>2"
508 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
509 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
510 ""
511 "%.\\tabs.s%T0\\t%0, %1;")
512
513 (define_insn "neg<mode>2"
514 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
515 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
516 ""
517 "%.\\tneg.s%T0\\t%0, %1;")
518
519 (define_insn "one_cmpl<mode>2"
520 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
521 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
522 ""
523 "%.\\tnot.b%T0\\t%0, %1;")
524
525 (define_insn "bitrev<mode>2"
526 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
527 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
528 UNSPEC_BITREV))]
529 ""
530 "%.\\tbrev.b%T0\\t%0, %1;")
531
532 (define_insn "clz<mode>2"
533 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
534 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
535 ""
536 "%.\\tclz.b%T1\\t%0, %1;")
537
538 (define_expand "ctz<mode>2"
539 [(set (match_operand:SI 0 "nvptx_register_operand" "")
540 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
541 ""
542 {
543 rtx tmpreg = gen_reg_rtx (<MODE>mode);
544 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
545 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
546 DONE;
547 })
548
549 (define_insn "popcount<mode>2"
550 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
551 (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
552 ""
553 "%.\\tpopc.b%T1\\t%0, %1;")
554
555 ;; Multiplication variants
556
557 (define_insn "mulhisi3"
558 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
559 (mult:SI (sign_extend:SI
560 (match_operand:HI 1 "nvptx_register_operand" "R"))
561 (sign_extend:SI
562 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
563 ""
564 "%.\\tmul.wide.s16\\t%0, %1, %2;")
565
566 (define_insn "mulsidi3"
567 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
568 (mult:DI (sign_extend:DI
569 (match_operand:SI 1 "nvptx_register_operand" "R"))
570 (sign_extend:DI
571 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
572 ""
573 "%.\\tmul.wide.s32\\t%0, %1, %2;")
574
575 (define_insn "umulhisi3"
576 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
577 (mult:SI (zero_extend:SI
578 (match_operand:HI 1 "nvptx_register_operand" "R"))
579 (zero_extend:SI
580 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
581 ""
582 "%.\\tmul.wide.u16\\t%0, %1, %2;")
583
584 (define_insn "umulsidi3"
585 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
586 (mult:DI (zero_extend:DI
587 (match_operand:SI 1 "nvptx_register_operand" "R"))
588 (zero_extend:DI
589 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
590 ""
591 "%.\\tmul.wide.u32\\t%0, %1, %2;")
592
593 (define_insn "smulhi3_highpart"
594 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
595 (truncate:HI
596 (lshiftrt:SI
597 (mult:SI (sign_extend:SI
598 (match_operand:HI 1 "nvptx_register_operand" "R"))
599 (sign_extend:SI
600 (match_operand:HI 2 "nvptx_register_operand" "R")))
601 (const_int 16))))]
602 ""
603 "%.\\tmul.hi.s16\\t%0, %1, %2;")
604
605 (define_insn "smulsi3_highpart"
606 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
607 (truncate:SI
608 (lshiftrt:DI
609 (mult:DI (sign_extend:DI
610 (match_operand:SI 1 "nvptx_register_operand" "R"))
611 (sign_extend:DI
612 (match_operand:SI 2 "nvptx_register_operand" "R")))
613 (const_int 32))))]
614 ""
615 "%.\\tmul.hi.s32\\t%0, %1, %2;")
616
617 (define_insn "umulhi3_highpart"
618 [(set (match_operand:HI 0 "nvptx_register_operand" "=R")
619 (truncate:HI
620 (lshiftrt:SI
621 (mult:SI (zero_extend:SI
622 (match_operand:HI 1 "nvptx_register_operand" "R"))
623 (zero_extend:SI
624 (match_operand:HI 2 "nvptx_register_operand" "R")))
625 (const_int 16))))]
626 ""
627 "%.\\tmul.hi.u16\\t%0, %1, %2;")
628
629 (define_insn "umulsi3_highpart"
630 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
631 (truncate:SI
632 (lshiftrt:DI
633 (mult:DI (zero_extend:DI
634 (match_operand:SI 1 "nvptx_register_operand" "R"))
635 (zero_extend:DI
636 (match_operand:SI 2 "nvptx_register_operand" "R")))
637 (const_int 32))))]
638 ""
639 "%.\\tmul.hi.u32\\t%0, %1, %2;")
640
641 ;; Shifts
642
643 (define_insn "ashl<mode>3"
644 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
645 (ashift:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
646 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
647 ""
648 "%.\\tshl.b%T0\\t%0, %1, %2;")
649
650 (define_insn "ashr<mode>3"
651 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
652 (ashiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
653 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
654 ""
655 "%.\\tshr.s%T0\\t%0, %1, %2;")
656
657 (define_insn "lshr<mode>3"
658 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
659 (lshiftrt:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
660 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
661 ""
662 "%.\\tshr.u%T0\\t%0, %1, %2;")
663
664 ;; Logical operations
665
666 (define_insn "and<mode>3"
667 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
668 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
669 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
670 ""
671 "%.\\tand.b%T0\\t%0, %1, %2;")
672
673 (define_insn "ior<mode>3"
674 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
675 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
676 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
677 ""
678 "%.\\tor.b%T0\\t%0, %1, %2;")
679
680 (define_insn "xor<mode>3"
681 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
682 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
683 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
684 ""
685 "%.\\txor.b%T0\\t%0, %1, %2;")
686
687 ;; Comparisons and branches
688
689 (define_insn "*cmp<mode>"
690 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
691 (match_operator:BI 1 "nvptx_comparison_operator"
692 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
693 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
694 ""
695 "%.\\tsetp%c1\\t%0, %2, %3;")
696
697 (define_insn "*cmp<mode>"
698 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
699 (match_operator:BI 1 "nvptx_float_comparison_operator"
700 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
701 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
702 ""
703 "%.\\tsetp%c1\\t%0, %2, %3;")
704
705 (define_insn "jump"
706 [(set (pc)
707 (label_ref (match_operand 0 "" "")))]
708 ""
709 "%.\\tbra\\t%l0;")
710
711 (define_insn "br_true"
712 [(set (pc)
713 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
714 (const_int 0))
715 (label_ref (match_operand 1 "" ""))
716 (pc)))]
717 ""
718 "%j0\\tbra\\t%l1;"
719 [(set_attr "predicable" "false")])
720
721 (define_insn "br_false"
722 [(set (pc)
723 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
724 (const_int 0))
725 (label_ref (match_operand 1 "" ""))
726 (pc)))]
727 ""
728 "%J0\\tbra\\t%l1;"
729 [(set_attr "predicable" "false")])
730
731 ;; unified conditional branch
732 (define_insn "br_true_uni"
733 [(set (pc) (if_then_else
734 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
735 UNSPEC_BR_UNIFIED) (const_int 0))
736 (label_ref (match_operand 1 "" "")) (pc)))]
737 ""
738 "%j0\\tbra.uni\\t%l1;"
739 [(set_attr "predicable" "false")])
740
741 (define_insn "br_false_uni"
742 [(set (pc) (if_then_else
743 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
744 UNSPEC_BR_UNIFIED) (const_int 0))
745 (label_ref (match_operand 1 "" "")) (pc)))]
746 ""
747 "%J0\\tbra.uni\\t%l1;"
748 [(set_attr "predicable" "false")])
749
750 (define_expand "cbranch<mode>4"
751 [(set (pc)
752 (if_then_else (match_operator 0 "nvptx_comparison_operator"
753 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
754 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
755 (label_ref (match_operand 3 "" ""))
756 (pc)))]
757 ""
758 {
759 rtx t = nvptx_expand_compare (operands[0]);
760 operands[0] = t;
761 operands[1] = XEXP (t, 0);
762 operands[2] = XEXP (t, 1);
763 })
764
765 (define_expand "cbranch<mode>4"
766 [(set (pc)
767 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
768 [(match_operand:SDFM 1 "nvptx_register_operand" "")
769 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
770 (label_ref (match_operand 3 "" ""))
771 (pc)))]
772 ""
773 {
774 rtx t = nvptx_expand_compare (operands[0]);
775 operands[0] = t;
776 operands[1] = XEXP (t, 0);
777 operands[2] = XEXP (t, 1);
778 })
779
780 (define_expand "cbranchbi4"
781 [(set (pc)
782 (if_then_else (match_operator 0 "predicate_operator"
783 [(match_operand:BI 1 "nvptx_register_operand" "")
784 (match_operand:BI 2 "const0_operand" "")])
785 (label_ref (match_operand 3 "" ""))
786 (pc)))]
787 ""
788 "")
789
790 ;; Conditional stores
791
792 (define_insn "setcc_from_bi"
793 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
794 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
795 (const_int 0)))]
796 ""
797 "%.\\tselp%t0 %0,-1,0,%1;")
798
799 (define_insn "sel_true<mode>"
800 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
801 (if_then_else:HSDIM
802 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
803 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
804 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
805 ""
806 "%.\\tselp%t0\\t%0, %2, %3, %1;")
807
808 (define_insn "sel_true<mode>"
809 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
810 (if_then_else:SDFM
811 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
812 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
813 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
814 ""
815 "%.\\tselp%t0\\t%0, %2, %3, %1;")
816
817 (define_insn "sel_false<mode>"
818 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
819 (if_then_else:HSDIM
820 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
821 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
822 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
823 ""
824 "%.\\tselp%t0\\t%0, %3, %2, %1;")
825
826 (define_insn "sel_false<mode>"
827 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
828 (if_then_else:SDFM
829 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
830 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
831 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
832 ""
833 "%.\\tselp%t0\\t%0, %3, %2, %1;")
834
835 (define_insn "setcc_int<mode>"
836 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
837 (match_operator:SI 1 "nvptx_comparison_operator"
838 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
839 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
840 ""
841 "%.\\tset%t0%c1\\t%0, %2, %3;")
842
843 (define_insn "setcc_int<mode>"
844 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
845 (match_operator:SI 1 "nvptx_float_comparison_operator"
846 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
847 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
848 ""
849 "%.\\tset%t0%c1\\t%0, %2, %3;")
850
851 (define_insn "setcc_float<mode>"
852 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
853 (match_operator:SF 1 "nvptx_comparison_operator"
854 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
855 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
856 ""
857 "%.\\tset%t0%c1\\t%0, %2, %3;")
858
859 (define_insn "setcc_float<mode>"
860 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
861 (match_operator:SF 1 "nvptx_float_comparison_operator"
862 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
863 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
864 ""
865 "%.\\tset%t0%c1\\t%0, %2, %3;")
866
867 (define_expand "cstorebi4"
868 [(set (match_operand:SI 0 "nvptx_register_operand")
869 (match_operator:SI 1 "ne_operator"
870 [(match_operand:BI 2 "nvptx_register_operand")
871 (match_operand:BI 3 "const0_operand")]))]
872 ""
873 "")
874
875 (define_expand "cstore<mode>4"
876 [(set (match_operand:SI 0 "nvptx_register_operand")
877 (match_operator:SI 1 "nvptx_comparison_operator"
878 [(match_operand:HSDIM 2 "nvptx_register_operand")
879 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
880 ""
881 "")
882
883 (define_expand "cstore<mode>4"
884 [(set (match_operand:SI 0 "nvptx_register_operand")
885 (match_operator:SI 1 "nvptx_float_comparison_operator"
886 [(match_operand:SDFM 2 "nvptx_register_operand")
887 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
888 ""
889 "")
890
891 ;; Calls
892
893 (define_insn "call_insn_<mode>"
894 [(match_parallel 2 "call_operation"
895 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
896 (match_operand 1))])]
897 ""
898 {
899 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
900 })
901
902 (define_insn "call_value_insn_<mode>"
903 [(match_parallel 3 "call_operation"
904 [(set (match_operand 0 "nvptx_register_operand" "=R")
905 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
906 (match_operand 2)))])]
907 ""
908 {
909 return nvptx_output_call_insn (insn, operands[0], operands[1]);
910 })
911
912 (define_expand "call"
913 [(match_operand 0 "" "")]
914 ""
915 {
916 nvptx_expand_call (NULL_RTX, operands[0]);
917 DONE;
918 })
919
920 (define_expand "call_value"
921 [(match_operand 0 "" "")
922 (match_operand 1 "" "")]
923 ""
924 {
925 nvptx_expand_call (operands[0], operands[1]);
926 DONE;
927 })
928
929 ;; Floating point arithmetic.
930
931 (define_insn "add<mode>3"
932 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
933 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
934 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
935 ""
936 "%.\\tadd%t0\\t%0, %1, %2;")
937
938 (define_insn "sub<mode>3"
939 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
940 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
941 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
942 ""
943 "%.\\tsub%t0\\t%0, %1, %2;")
944
945 (define_insn "mul<mode>3"
946 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
947 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
948 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
949 ""
950 "%.\\tmul%t0\\t%0, %1, %2;")
951
952 (define_insn "fma<mode>4"
953 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
954 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
955 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
956 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
957 ""
958 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
959
960 (define_insn "*recip<mode>2"
961 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
962 (div:SDFM
963 (match_operand:SDFM 2 "const_double_operand" "F")
964 (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
965 "CONST_DOUBLE_P (operands[2])
966 && real_identical (CONST_DOUBLE_REAL_VALUE (operands[2]), &dconst1)"
967 "%.\\trcp%#%t0\\t%0, %1;")
968
969 (define_insn "div<mode>3"
970 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
971 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
972 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
973 ""
974 "%.\\tdiv%#%t0\\t%0, %1, %2;")
975
976 (define_insn "copysign<mode>3"
977 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
978 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
979 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
980 UNSPEC_COPYSIGN))]
981 ""
982 "%.\\tcopysign%t0\\t%0, %2, %1;")
983
984 (define_insn "smin<mode>3"
985 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
986 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
987 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
988 ""
989 "%.\\tmin%t0\\t%0, %1, %2;")
990
991 (define_insn "smax<mode>3"
992 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
993 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
994 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
995 ""
996 "%.\\tmax%t0\\t%0, %1, %2;")
997
998 (define_insn "abs<mode>2"
999 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1000 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1001 ""
1002 "%.\\tabs%t0\\t%0, %1;")
1003
1004 (define_insn "neg<mode>2"
1005 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1006 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1007 ""
1008 "%.\\tneg%t0\\t%0, %1;")
1009
1010 (define_insn "sqrt<mode>2"
1011 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1012 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1013 ""
1014 "%.\\tsqrt%#%t0\\t%0, %1;")
1015
1016 (define_expand "sincossf3"
1017 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1018 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
1019 UNSPEC_COS))
1020 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
1021 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
1022 "flag_unsafe_math_optimizations"
1023 {
1024 operands[2] = make_safe_from (operands[2], operands[0]);
1025 })
1026
1027 (define_insn "sinsf2"
1028 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1029 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1030 UNSPEC_SIN))]
1031 "flag_unsafe_math_optimizations"
1032 "%.\\tsin.approx%t0\\t%0, %1;")
1033
1034 (define_insn "cossf2"
1035 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1036 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1037 UNSPEC_COS))]
1038 "flag_unsafe_math_optimizations"
1039 "%.\\tcos.approx%t0\\t%0, %1;")
1040
1041 (define_insn "log2sf2"
1042 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1043 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1044 UNSPEC_LOG2))]
1045 "flag_unsafe_math_optimizations"
1046 "%.\\tlg2.approx%t0\\t%0, %1;")
1047
1048 (define_insn "exp2sf2"
1049 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1050 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
1051 UNSPEC_EXP2))]
1052 "flag_unsafe_math_optimizations"
1053 "%.\\tex2.approx%t0\\t%0, %1;")
1054
1055 ;; Conversions involving floating point
1056
1057 (define_insn "extendsfdf2"
1058 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
1059 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
1060 ""
1061 "%.\\tcvt%t0%t1\\t%0, %1;")
1062
1063 (define_insn "truncdfsf2"
1064 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
1065 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
1066 ""
1067 "%.\\tcvt%#%t0%t1\\t%0, %1;")
1068
1069 (define_insn "floatunssi<mode>2"
1070 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1071 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1072 ""
1073 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1074
1075 (define_insn "floatsi<mode>2"
1076 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1077 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
1078 ""
1079 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1080
1081 (define_insn "floatunsdi<mode>2"
1082 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1083 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1084 ""
1085 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
1086
1087 (define_insn "floatdi<mode>2"
1088 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1089 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
1090 ""
1091 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
1092
1093 (define_insn "fixuns_trunc<mode>si2"
1094 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1095 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1096 ""
1097 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1098
1099 (define_insn "fix_trunc<mode>si2"
1100 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1101 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1102 ""
1103 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1104
1105 (define_insn "fixuns_trunc<mode>di2"
1106 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1107 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1108 ""
1109 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1110
1111 (define_insn "fix_trunc<mode>di2"
1112 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1113 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1114 ""
1115 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1116
1117 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1118 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1119 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1120 (UNSPEC_FPINT_BTRUNC "btrunc")
1121 (UNSPEC_FPINT_CEIL "ceil")
1122 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1123 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1124 (UNSPEC_FPINT_BTRUNC ".rzi")
1125 (UNSPEC_FPINT_CEIL ".rpi")
1126 (UNSPEC_FPINT_NEARBYINT "%#i")])
1127
1128 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
1129 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1130 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1131 FPINT))]
1132 ""
1133 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1134
1135 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1136 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1137 (UNSPEC_FPINT_CEIL "lceil")])
1138 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1139 (UNSPEC_FPINT_CEIL ".rpi")])
1140
1141 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1142 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1143 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1144 FPINT2))]
1145 ""
1146 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1147
1148 ;; Vector operations
1149
1150 (define_insn "*vec_set<mode>_0"
1151 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1152 (vec_merge:VECIM
1153 (vec_duplicate:VECIM
1154 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1155 (match_dup 0)
1156 (const_int 1)))]
1157 ""
1158 "%.\\tmov%t1\\t%0.x, %1;")
1159
1160 (define_insn "*vec_set<mode>_1"
1161 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1162 (vec_merge:VECIM
1163 (vec_duplicate:VECIM
1164 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1165 (match_dup 0)
1166 (const_int 2)))]
1167 ""
1168 "%.\\tmov%t1\\t%0.y, %1;")
1169
1170 (define_insn "*vec_set<mode>_2"
1171 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1172 (vec_merge:VECIM
1173 (vec_duplicate:VECIM
1174 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1175 (match_dup 0)
1176 (const_int 4)))]
1177 ""
1178 "%.\\tmov%t1\\t%0.z, %1;")
1179
1180 (define_insn "*vec_set<mode>_3"
1181 [(set (match_operand:VECIM 0 "nvptx_register_operand" "=R")
1182 (vec_merge:VECIM
1183 (vec_duplicate:VECIM
1184 (match_operand:<VECELEM> 1 "nvptx_register_operand" "R"))
1185 (match_dup 0)
1186 (const_int 8)))]
1187 ""
1188 "%.\\tmov%t1\\t%0.w, %1;")
1189
1190 (define_expand "vec_set<mode>"
1191 [(match_operand:VECIM 0 "nvptx_register_operand")
1192 (match_operand:<VECELEM> 1 "nvptx_register_operand")
1193 (match_operand:SI 2 "nvptx_vector_index_operand")]
1194 ""
1195 {
1196 enum machine_mode mode = GET_MODE (operands[0]);
1197 int mask = 1 << INTVAL (operands[2]);
1198 rtx tmp = gen_rtx_VEC_DUPLICATE (mode, operands[1]);
1199 tmp = gen_rtx_VEC_MERGE (mode, tmp, operands[0], GEN_INT (mask));
1200 emit_insn (gen_rtx_SET (operands[0], tmp));
1201 DONE;
1202 })
1203
1204 (define_insn "vec_extract<mode><Vecelem>"
1205 [(set (match_operand:<VECELEM> 0 "nvptx_register_operand" "=R")
1206 (vec_select:<VECELEM>
1207 (match_operand:VECIM 1 "nvptx_register_operand" "R")
1208 (parallel [(match_operand:SI 2 "nvptx_vector_index_operand" "")])))]
1209 ""
1210 {
1211 static const char *const asms[4] = {
1212 "%.\\tmov%t0\\t%0, %1.x;",
1213 "%.\\tmov%t0\\t%0, %1.y;",
1214 "%.\\tmov%t0\\t%0, %1.z;",
1215 "%.\\tmov%t0\\t%0, %1.w;"
1216 };
1217 return asms[INTVAL (operands[2])];
1218 })
1219
1220 ;; Miscellaneous
1221
1222 (define_insn "nop"
1223 [(const_int 0)]
1224 ""
1225 "")
1226
1227 (define_insn "exit"
1228 [(const_int 1)]
1229 ""
1230 "exit;")
1231
1232 (define_insn "fake_nop"
1233 [(const_int 2)]
1234 ""
1235 "{
1236 .reg .u32 %%nop_src;
1237 .reg .u32 %%nop_dst;
1238 mov.u32 %%nop_dst, %%nop_src;
1239 }")
1240
1241 (define_insn "return"
1242 [(return)]
1243 ""
1244 {
1245 return nvptx_output_return ();
1246 }
1247 [(set_attr "predicable" "false")])
1248
1249 (define_expand "epilogue"
1250 [(clobber (const_int 0))]
1251 ""
1252 {
1253 if (TARGET_SOFT_STACK)
1254 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1255 SOFTSTACK_PREV_REGNUM)));
1256 emit_jump_insn (gen_return ());
1257 DONE;
1258 })
1259
1260 (define_expand "nonlocal_goto"
1261 [(match_operand 0 "" "")
1262 (match_operand 1 "" "")
1263 (match_operand 2 "" "")
1264 (match_operand 3 "" "")]
1265 ""
1266 {
1267 sorry ("target cannot support nonlocal goto.");
1268 emit_insn (gen_nop ());
1269 DONE;
1270 })
1271
1272 (define_expand "nonlocal_goto_receiver"
1273 [(const_int 0)]
1274 ""
1275 {
1276 sorry ("target cannot support nonlocal goto.");
1277 })
1278
1279 (define_expand "allocate_stack"
1280 [(match_operand 0 "nvptx_register_operand")
1281 (match_operand 1 "nvptx_register_operand")]
1282 ""
1283 {
1284 if (TARGET_SOFT_STACK)
1285 {
1286 emit_move_insn (stack_pointer_rtx,
1287 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1288 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1289 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1290 DONE;
1291 }
1292 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1293 only) but notes it is not implemented. The assembler emits a
1294 confused error message. Issue a blunt one now instead. */
1295 sorry ("target cannot support alloca.");
1296 emit_insn (gen_nop ());
1297 DONE;
1298 })
1299
1300 (define_insn "@set_softstack_<mode>"
1301 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1302 UNSPEC_SET_SOFTSTACK)]
1303 "TARGET_SOFT_STACK"
1304 {
1305 return nvptx_output_set_softstack (REGNO (operands[0]));
1306 })
1307
1308 (define_expand "restore_stack_block"
1309 [(match_operand 0 "register_operand" "")
1310 (match_operand 1 "register_operand" "")]
1311 ""
1312 {
1313 if (TARGET_SOFT_STACK)
1314 {
1315 emit_move_insn (operands[0], operands[1]);
1316 emit_insn (gen_set_softstack (Pmode, operands[0]));
1317 }
1318 DONE;
1319 })
1320
1321 (define_expand "restore_stack_function"
1322 [(match_operand 0 "register_operand" "")
1323 (match_operand 1 "register_operand" "")]
1324 ""
1325 {
1326 DONE;
1327 })
1328
1329 (define_insn "trap"
1330 [(trap_if (const_int 1) (const_int 0))]
1331 ""
1332 "trap; exit;")
1333
1334 (define_insn "trap_if_true"
1335 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1336 (const_int 0))
1337 (const_int 0))]
1338 ""
1339 "%j0 trap; %j0 exit;"
1340 [(set_attr "predicable" "false")])
1341
1342 (define_insn "trap_if_false"
1343 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1344 (const_int 0))
1345 (const_int 0))]
1346 ""
1347 "%J0 trap; %J0 exit;"
1348 [(set_attr "predicable" "false")])
1349
1350 (define_expand "ctrap<mode>4"
1351 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1352 [(match_operand:SDIM 1 "nvptx_register_operand")
1353 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1354 (match_operand 3 "const0_operand"))]
1355 ""
1356 {
1357 rtx t = nvptx_expand_compare (operands[0]);
1358 emit_insn (gen_trap_if_true (t));
1359 DONE;
1360 })
1361
1362 (define_insn "oacc_dim_size"
1363 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1364 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1365 UNSPEC_DIM_SIZE))]
1366 ""
1367 {
1368 static const char *const asms[] =
1369 { /* Must match oacc_loop_levels ordering. */
1370 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1371 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1372 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1373 };
1374 return asms[INTVAL (operands[1])];
1375 })
1376
1377 (define_insn "oacc_dim_pos"
1378 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1379 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1380 UNSPECV_DIM_POS))]
1381 ""
1382 {
1383 static const char *const asms[] =
1384 { /* Must match oacc_loop_levels ordering. */
1385 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1386 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1387 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1388 };
1389 return asms[INTVAL (operands[1])];
1390 })
1391
1392 (define_insn "nvptx_fork"
1393 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1394 UNSPECV_FORK)]
1395 ""
1396 "// fork %0;"
1397 [(set_attr "predicable" "false")])
1398
1399 (define_insn "nvptx_forked"
1400 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1401 UNSPECV_FORKED)]
1402 ""
1403 "// forked %0;"
1404 [(set_attr "predicable" "false")])
1405
1406 (define_insn "nvptx_joining"
1407 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1408 UNSPECV_JOINING)]
1409 ""
1410 "// joining %0;"
1411 [(set_attr "predicable" "false")])
1412
1413 (define_insn "nvptx_join"
1414 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1415 UNSPECV_JOIN)]
1416 ""
1417 "// join %0;"
1418 [(set_attr "predicable" "false")])
1419
1420 (define_expand "oacc_fork"
1421 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1422 (match_operand:SI 1 "general_operand" ""))
1423 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1424 UNSPECV_FORKED)]
1425 ""
1426 {
1427 if (operands[0] != const0_rtx)
1428 emit_move_insn (operands[0], operands[1]);
1429 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1430 DONE;
1431 })
1432
1433 (define_expand "oacc_join"
1434 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1435 (match_operand:SI 1 "general_operand" ""))
1436 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1437 UNSPECV_JOIN)]
1438 ""
1439 {
1440 if (operands[0] != const0_rtx)
1441 emit_move_insn (operands[0], operands[1]);
1442 nvptx_expand_oacc_join (INTVAL (operands[2]));
1443 DONE;
1444 })
1445
1446 ;; only 32-bit shuffles exist.
1447 (define_insn "nvptx_shuffle<mode>"
1448 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1449 (unspec:BITS
1450 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1451 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1452 (match_operand:SI 3 "const_int_operand" "n")]
1453 UNSPEC_SHUFFLE))]
1454 ""
1455 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1456
1457 (define_insn "nvptx_vote_ballot"
1458 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1459 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1460 UNSPEC_VOTE_BALLOT))]
1461 ""
1462 "%.\\tvote.ballot.b32\\t%0, %1;")
1463
1464 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1465
1466 (define_insn "@omp_simt_enter_<mode>"
1467 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1468 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1469 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1470 UNSPECV_SIMT_ENTER))]
1471 ""
1472 {
1473 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1474 })
1475
1476 (define_expand "omp_simt_enter"
1477 [(match_operand 0 "nvptx_register_operand" "=R")
1478 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1479 (match_operand 2 "const_int_operand" "n")]
1480 ""
1481 {
1482 if (!CONST_INT_P (operands[1]))
1483 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1484 else
1485 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1486 cfun->machine->simt_stack_size);
1487 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1488 cfun->machine->simt_stack_align);
1489 cfun->machine->has_simtreg = true;
1490 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1491 DONE;
1492 })
1493
1494 (define_expand "omp_simt_exit"
1495 [(match_operand 0 "nvptx_register_operand" "R")]
1496 ""
1497 {
1498 emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1499 DONE;
1500 })
1501
1502 (define_insn "@omp_simt_exit_<mode>"
1503 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1504 UNSPECV_SIMT_EXIT)]
1505 ""
1506 {
1507 return nvptx_output_simt_exit (operands[0]);
1508 })
1509
1510 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1511 (define_insn "omp_simt_lane"
1512 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1513 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1514 ""
1515 "%.\\tmov.u32\\t%0, %%laneid;")
1516
1517 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1518 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1519 (define_expand "omp_simt_ordered"
1520 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1521 (match_operand:SI 1 "nvptx_register_operand" "R")]
1522 ""
1523 {
1524 emit_move_insn (operands[0], operands[1]);
1525 emit_insn (gen_nvptx_nounroll ());
1526 DONE;
1527 })
1528
1529 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1530 ;; across lanes
1531 (define_expand "omp_simt_xchg_bfly"
1532 [(match_operand 0 "nvptx_register_operand" "=R")
1533 (match_operand 1 "nvptx_register_operand" "R")
1534 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1535 ""
1536 {
1537 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1538 SHUFFLE_BFLY));
1539 DONE;
1540 })
1541
1542 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1543 ;; from lane given by index in operand 2 to operand 0 in all lanes
1544 (define_expand "omp_simt_xchg_idx"
1545 [(match_operand 0 "nvptx_register_operand" "=R")
1546 (match_operand 1 "nvptx_register_operand" "R")
1547 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1548 ""
1549 {
1550 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1551 SHUFFLE_IDX));
1552 DONE;
1553 })
1554
1555 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1556 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1557 (define_expand "omp_simt_vote_any"
1558 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1559 (match_operand:SI 1 "nvptx_register_operand" "R")]
1560 ""
1561 {
1562 rtx pred = gen_reg_rtx (BImode);
1563 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1564 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1565 DONE;
1566 })
1567
1568 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1569 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1570 (define_expand "omp_simt_last_lane"
1571 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1572 (match_operand:SI 1 "nvptx_register_operand" "R")]
1573 ""
1574 {
1575 rtx pred = gen_reg_rtx (BImode);
1576 rtx tmp = gen_reg_rtx (SImode);
1577 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1578 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1579 emit_insn (gen_ctzsi2 (operands[0], tmp));
1580 DONE;
1581 })
1582
1583 ;; extract parts of a 64 bit object into 2 32-bit ints
1584 (define_insn "unpack<mode>si2"
1585 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1586 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1587 (const_int 0)] UNSPEC_BIT_CONV))
1588 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1589 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1590 ""
1591 "%.\\tmov.b64\\t{%0,%1}, %2;")
1592
1593 ;; pack 2 32-bit ints into a 64 bit object
1594 (define_insn "packsi<mode>2"
1595 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1596 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1597 (match_operand:SI 2 "nvptx_register_operand" "R")]
1598 UNSPEC_BIT_CONV))]
1599 ""
1600 "%.\\tmov.b64\\t%0, {%1,%2};")
1601
1602 ;; Atomic insns.
1603
1604 (define_expand "atomic_compare_and_swap<mode>"
1605 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1606 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1607 (match_operand:SDIM 2 "memory_operand") ;; memory
1608 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1609 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1610 (match_operand:SI 5 "const_int_operand") ;; is_weak
1611 (match_operand:SI 6 "const_int_operand") ;; success model
1612 (match_operand:SI 7 "const_int_operand")] ;; failure model
1613 ""
1614 {
1615 emit_insn (gen_atomic_compare_and_swap<mode>_1
1616 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1617
1618 rtx cond = gen_reg_rtx (BImode);
1619 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1620 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
1621 DONE;
1622 })
1623
1624 (define_insn "atomic_compare_and_swap<mode>_1"
1625 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1626 (unspec_volatile:SDIM
1627 [(match_operand:SDIM 1 "memory_operand" "+m")
1628 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1629 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
1630 (match_operand:SI 4 "const_int_operand")]
1631 UNSPECV_CAS))
1632 (set (match_dup 1)
1633 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1634 ""
1635 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1636 [(set_attr "atomic" "true")])
1637
1638 (define_insn "atomic_exchange<mode>"
1639 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1640 (unspec_volatile:SDIM
1641 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1642 (match_operand:SI 3 "const_int_operand")] ;; model
1643 UNSPECV_XCHG))
1644 (set (match_dup 1)
1645 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
1646 ""
1647 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1648 [(set_attr "atomic" "true")])
1649
1650 (define_insn "atomic_fetch_add<mode>"
1651 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1652 (unspec_volatile:SDIM
1653 [(plus:SDIM (match_dup 1)
1654 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1655 (match_operand:SI 3 "const_int_operand")] ;; model
1656 UNSPECV_LOCK))
1657 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1658 (match_dup 1))]
1659 ""
1660 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1661 [(set_attr "atomic" "true")])
1662
1663 (define_insn "atomic_fetch_addsf"
1664 [(set (match_operand:SF 1 "memory_operand" "+m")
1665 (unspec_volatile:SF
1666 [(plus:SF (match_dup 1)
1667 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1668 (match_operand:SI 3 "const_int_operand")] ;; model
1669 UNSPECV_LOCK))
1670 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1671 (match_dup 1))]
1672 ""
1673 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1674 [(set_attr "atomic" "true")])
1675
1676 (define_code_iterator any_logic [and ior xor])
1677 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1678
1679 (define_insn "atomic_fetch_<logic><mode>"
1680 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1681 (unspec_volatile:SDIM
1682 [(any_logic:SDIM (match_dup 1)
1683 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1684 (match_operand:SI 3 "const_int_operand")] ;; model
1685 UNSPECV_LOCK))
1686 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1687 (match_dup 1))]
1688 "<MODE>mode == SImode || TARGET_SM35"
1689 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1690 [(set_attr "atomic" "true")])
1691
1692 (define_expand "atomic_test_and_set"
1693 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1694 (match_operand:QI 1 "memory_operand") ;; memory
1695 (match_operand:SI 2 "const_int_operand")] ;; model
1696 ""
1697 {
1698 rtx libfunc;
1699 rtx addr;
1700 libfunc = init_one_libfunc ("__atomic_test_and_set_1");
1701 addr = convert_memory_address (ptr_mode, XEXP (operands[1], 0));
1702 emit_library_call_value (libfunc, operands[0], LCT_NORMAL, SImode,
1703 addr, ptr_mode,
1704 operands[2], SImode);
1705 DONE;
1706 })
1707
1708 (define_insn "nvptx_barsync"
1709 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
1710 (match_operand:SI 1 "const_int_operand")]
1711 UNSPECV_BARSYNC)]
1712 ""
1713 {
1714 if (INTVAL (operands[1]) == 0)
1715 return "\\tbar.sync\\t%0;";
1716 else
1717 return "\\tbar.sync\\t%0, %1;";
1718 }
1719 [(set_attr "predicable" "false")])
1720
1721 (define_expand "memory_barrier"
1722 [(set (match_dup 0)
1723 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1724 ""
1725 {
1726 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1727 MEM_VOLATILE_P (operands[0]) = 1;
1728 })
1729
1730 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
1731 ;; (corresponding to cuda functions threadfence_block, threadfence and
1732 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
1733 ;; may be overconservative, but before using membar.gl instead we'll need to
1734 ;; explain in detail why it's safe to use. For now, use membar.sys.
1735 (define_insn "*memory_barrier"
1736 [(set (match_operand:BLK 0 "" "")
1737 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1738 ""
1739 "\\tmembar.sys;"
1740 [(set_attr "predicable" "false")])
1741
1742 (define_expand "nvptx_membar_cta"
1743 [(set (match_dup 0)
1744 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1745 ""
1746 {
1747 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1748 MEM_VOLATILE_P (operands[0]) = 1;
1749 })
1750
1751 (define_insn "*nvptx_membar_cta"
1752 [(set (match_operand:BLK 0 "" "")
1753 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1754 ""
1755 "\\tmembar.cta;"
1756 [(set_attr "predicable" "false")])
1757
1758 (define_insn "nvptx_nounroll"
1759 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1760 ""
1761 "\\t.pragma \\\"nounroll\\\";"
1762 [(set_attr "predicable" "false")])
1763
1764 (define_insn "nvptx_red_partition"
1765 [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
1766 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
1767 UNSPECV_RED_PART))]
1768 ""
1769 {
1770 return nvptx_output_red_partition (operands[0], operands[1]);
1771 }
1772 [(set_attr "predicable" "false")])