preprocessor: Simplify read_main
[gcc.git] / gcc / config / nvptx / nvptx.md
1 ;; Machine description for NVPTX.
2 ;; Copyright (C) 2014-2020 Free Software Foundation, Inc.
3 ;; Contributed by Bernd Schmidt <bernds@codesourcery.com>
4 ;;
5 ;; This file is part of GCC.
6 ;;
7 ;; GCC is free software; you can redistribute it and/or modify
8 ;; it under the terms of the GNU General Public License as published by
9 ;; the Free Software Foundation; either version 3, or (at your option)
10 ;; any later version.
11 ;;
12 ;; GCC is distributed in the hope that it will be useful,
13 ;; but WITHOUT ANY WARRANTY; without even the implied warranty of
14 ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 ;; GNU General Public License for more details.
16 ;;
17 ;; You should have received a copy of the GNU General Public License
18 ;; along with GCC; see the file COPYING3. If not see
19 ;; <http://www.gnu.org/licenses/>.
20
21 (define_c_enum "unspec" [
22 UNSPEC_ARG_REG
23
24 UNSPEC_COPYSIGN
25 UNSPEC_LOG2
26 UNSPEC_EXP2
27 UNSPEC_SIN
28 UNSPEC_COS
29
30 UNSPEC_FPINT_FLOOR
31 UNSPEC_FPINT_BTRUNC
32 UNSPEC_FPINT_CEIL
33 UNSPEC_FPINT_NEARBYINT
34
35 UNSPEC_BITREV
36
37 UNSPEC_ALLOCA
38
39 UNSPEC_SET_SOFTSTACK
40
41 UNSPEC_DIM_SIZE
42
43 UNSPEC_BIT_CONV
44
45 UNSPEC_VOTE_BALLOT
46
47 UNSPEC_LANEID
48
49 UNSPEC_SHUFFLE
50 UNSPEC_BR_UNIFIED
51 ])
52
53 (define_c_enum "unspecv" [
54 UNSPECV_LOCK
55 UNSPECV_CAS
56 UNSPECV_XCHG
57 UNSPECV_BARSYNC
58 UNSPECV_MEMBAR
59 UNSPECV_MEMBAR_CTA
60 UNSPECV_DIM_POS
61
62 UNSPECV_FORK
63 UNSPECV_FORKED
64 UNSPECV_JOINING
65 UNSPECV_JOIN
66
67 UNSPECV_NOUNROLL
68
69 UNSPECV_SIMT_ENTER
70 UNSPECV_SIMT_EXIT
71
72 UNSPECV_RED_PART
73 ])
74
75 (define_attr "subregs_ok" "false,true"
76 (const_string "false"))
77
78 (define_attr "atomic" "false,true"
79 (const_string "false"))
80
81 ;; The nvptx operand predicates, in general, don't permit subregs and
82 ;; only literal constants, which differ from the generic ones, which
83 ;; permit subregs and symbolc constants (as appropriate)
84 (define_predicate "nvptx_register_operand"
85 (match_code "reg")
86 {
87 return register_operand (op, mode);
88 })
89
90 (define_predicate "nvptx_nonimmediate_operand"
91 (match_code "mem,reg")
92 {
93 return (REG_P (op) ? register_operand (op, mode)
94 : memory_operand (op, mode));
95 })
96
97 (define_predicate "nvptx_nonmemory_operand"
98 (match_code "reg,const_int,const_double")
99 {
100 return (REG_P (op) ? register_operand (op, mode)
101 : immediate_operand (op, mode));
102 })
103
104 (define_predicate "const0_operand"
105 (and (match_code "const_int")
106 (match_test "op == const0_rtx")))
107
108 ;; True if this operator is valid for predication.
109 (define_predicate "predicate_operator"
110 (match_code "eq,ne"))
111
112 (define_predicate "ne_operator"
113 (match_code "ne"))
114
115 (define_predicate "nvptx_comparison_operator"
116 (match_code "eq,ne,le,ge,lt,gt,leu,geu,ltu,gtu"))
117
118 (define_predicate "nvptx_float_comparison_operator"
119 (match_code "eq,ne,le,ge,lt,gt,uneq,unle,unge,unlt,ungt,unordered,ordered"))
120
121 ;; Test for a valid operand for a call instruction.
122 (define_predicate "call_insn_operand"
123 (match_code "symbol_ref,reg")
124 {
125 return REG_P (op) || SYMBOL_REF_FUNCTION_P (op);
126 })
127
128 ;; Return true if OP is a call with parallel USEs of the argument
129 ;; pseudos.
130 (define_predicate "call_operation"
131 (match_code "parallel")
132 {
133 int arg_end = XVECLEN (op, 0);
134
135 for (int i = 1; i < arg_end; i++)
136 {
137 rtx elt = XVECEXP (op, 0, i);
138
139 if (GET_CODE (elt) != USE || !REG_P (XEXP (elt, 0)))
140 return false;
141 }
142 return true;
143 })
144
145 (define_attr "predicable" "false,true"
146 (const_string "true"))
147
148 (define_cond_exec
149 [(match_operator 0 "predicate_operator"
150 [(match_operand:BI 1 "nvptx_register_operand" "")
151 (match_operand:BI 2 "const0_operand" "")])]
152 ""
153 ""
154 )
155
156 (define_constraint "P0"
157 "An integer with the value 0."
158 (and (match_code "const_int")
159 (match_test "ival == 0")))
160
161 (define_constraint "P1"
162 "An integer with the value 1."
163 (and (match_code "const_int")
164 (match_test "ival == 1")))
165
166 (define_constraint "Pn"
167 "An integer with the value -1."
168 (and (match_code "const_int")
169 (match_test "ival == -1")))
170
171 (define_constraint "R"
172 "A pseudo register."
173 (match_code "reg"))
174
175 (define_constraint "Ia"
176 "Any integer constant."
177 (and (match_code "const_int") (match_test "true")))
178
179 (define_mode_iterator QHSDISDFM [QI HI SI DI SF DF])
180 (define_mode_iterator QHSDIM [QI HI SI DI])
181 (define_mode_iterator HSDIM [HI SI DI])
182 (define_mode_iterator BHSDIM [BI HI SI DI])
183 (define_mode_iterator SDIM [SI DI])
184 (define_mode_iterator SDISDFM [SI DI SF DF])
185 (define_mode_iterator QHIM [QI HI])
186 (define_mode_iterator QHSIM [QI HI SI])
187 (define_mode_iterator SDFM [SF DF])
188 (define_mode_iterator SDCM [SC DC])
189 (define_mode_iterator BITS [SI SF])
190 (define_mode_iterator BITD [DI DF])
191 (define_mode_iterator VECIM [V2SI V2DI])
192
193 ;; This mode iterator allows :P to be used for patterns that operate on
194 ;; pointer-sized quantities. Exactly one of the two alternatives will match.
195 (define_mode_iterator P [(SI "Pmode == SImode") (DI "Pmode == DImode")])
196
197 ;; We should get away with not defining memory alternatives, since we don't
198 ;; get variables in this mode and pseudos are never spilled.
199 (define_insn "movbi"
200 [(set (match_operand:BI 0 "nvptx_register_operand" "=R,R,R")
201 (match_operand:BI 1 "nvptx_nonmemory_operand" "R,P0,Pn"))]
202 ""
203 "@
204 %.\\tmov%t0\\t%0, %1;
205 %.\\tsetp.eq.u32\\t%0, 1, 0;
206 %.\\tsetp.eq.u32\\t%0, 1, 1;")
207
208 (define_insn "*mov<mode>_insn"
209 [(set (match_operand:VECIM 0 "nonimmediate_operand" "=R,R,m")
210 (match_operand:VECIM 1 "general_operand" "Ri,m,R"))]
211 "!MEM_P (operands[0]) || REG_P (operands[1])"
212 {
213 if (which_alternative == 1)
214 return "%.\\tld%A1%u1\\t%0, %1;";
215 if (which_alternative == 2)
216 return "%.\\tst%A0%u0\\t%0, %1;";
217
218 return nvptx_output_mov_insn (operands[0], operands[1]);
219 }
220 [(set_attr "subregs_ok" "true")])
221
222 (define_insn "*mov<mode>_insn"
223 [(set (match_operand:QHSDIM 0 "nonimmediate_operand" "=R,R,m")
224 (match_operand:QHSDIM 1 "general_operand" "Ri,m,R"))]
225 "!MEM_P (operands[0]) || REG_P (operands[1])"
226 {
227 if (which_alternative == 1)
228 return "%.\\tld%A1%u1\\t%0, %1;";
229 if (which_alternative == 2)
230 return "%.\\tst%A0%u0\\t%0, %1;";
231
232 return nvptx_output_mov_insn (operands[0], operands[1]);
233 }
234 [(set_attr "subregs_ok" "true")])
235
236 (define_insn "*mov<mode>_insn"
237 [(set (match_operand:SDFM 0 "nonimmediate_operand" "=R,R,m")
238 (match_operand:SDFM 1 "general_operand" "RF,m,R"))]
239 "!MEM_P (operands[0]) || REG_P (operands[1])"
240 {
241 if (which_alternative == 1)
242 return "%.\\tld%A1%u0\\t%0, %1;";
243 if (which_alternative == 2)
244 return "%.\\tst%A0%u1\\t%0, %1;";
245
246 return nvptx_output_mov_insn (operands[0], operands[1]);
247 }
248 [(set_attr "subregs_ok" "true")])
249
250 (define_insn "load_arg_reg<mode>"
251 [(set (match_operand:QHIM 0 "nvptx_register_operand" "=R")
252 (unspec:QHIM [(match_operand 1 "const_int_operand" "n")]
253 UNSPEC_ARG_REG))]
254 ""
255 "%.\\tcvt%t0.u32\\t%0, %%ar%1;")
256
257 (define_insn "load_arg_reg<mode>"
258 [(set (match_operand:SDISDFM 0 "nvptx_register_operand" "=R")
259 (unspec:SDISDFM [(match_operand 1 "const_int_operand" "n")]
260 UNSPEC_ARG_REG))]
261 ""
262 "%.\\tmov%t0\\t%0, %%ar%1;")
263
264 (define_expand "mov<mode>"
265 [(set (match_operand:VECIM 0 "nonimmediate_operand" "")
266 (match_operand:VECIM 1 "general_operand" ""))]
267 ""
268 {
269 if (MEM_P (operands[0]) && !REG_P (operands[1]))
270 {
271 rtx tmp = gen_reg_rtx (<MODE>mode);
272 emit_move_insn (tmp, operands[1]);
273 emit_move_insn (operands[0], tmp);
274 DONE;
275 }
276 })
277
278 (define_expand "mov<mode>"
279 [(set (match_operand:QHSDISDFM 0 "nonimmediate_operand" "")
280 (match_operand:QHSDISDFM 1 "general_operand" ""))]
281 ""
282 {
283 if (MEM_P (operands[0]) && !REG_P (operands[1]))
284 {
285 rtx tmp = gen_reg_rtx (<MODE>mode);
286 emit_move_insn (tmp, operands[1]);
287 emit_move_insn (operands[0], tmp);
288 DONE;
289 }
290
291 if (GET_CODE (operands[1]) == LABEL_REF)
292 sorry ("target cannot support label values");
293 })
294
295 (define_insn "zero_extendqihi2"
296 [(set (match_operand:HI 0 "nvptx_register_operand" "=R,R")
297 (zero_extend:HI (match_operand:QI 1 "nvptx_nonimmediate_operand" "R,m")))]
298 ""
299 "@
300 %.\\tcvt.u16.u%T1\\t%0, %1;
301 %.\\tld%A1.u8\\t%0, %1;"
302 [(set_attr "subregs_ok" "true")])
303
304 (define_insn "zero_extend<mode>si2"
305 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
306 (zero_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
307 ""
308 "@
309 %.\\tcvt.u32.u%T1\\t%0, %1;
310 %.\\tld%A1.u%T1\\t%0, %1;"
311 [(set_attr "subregs_ok" "true")])
312
313 (define_insn "zero_extend<mode>di2"
314 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
315 (zero_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
316 ""
317 "@
318 %.\\tcvt.u64.u%T1\\t%0, %1;
319 %.\\tld%A1%u1\\t%0, %1;"
320 [(set_attr "subregs_ok" "true")])
321
322 (define_insn "extend<mode>si2"
323 [(set (match_operand:SI 0 "nvptx_register_operand" "=R,R")
324 (sign_extend:SI (match_operand:QHIM 1 "nvptx_nonimmediate_operand" "R,m")))]
325 ""
326 "@
327 %.\\tcvt.s32.s%T1\\t%0, %1;
328 %.\\tld%A1.s%T1\\t%0, %1;"
329 [(set_attr "subregs_ok" "true")])
330
331 (define_insn "extend<mode>di2"
332 [(set (match_operand:DI 0 "nvptx_register_operand" "=R,R")
333 (sign_extend:DI (match_operand:QHSIM 1 "nvptx_nonimmediate_operand" "R,m")))]
334 ""
335 "@
336 %.\\tcvt.s64.s%T1\\t%0, %1;
337 %.\\tld%A1.s%T1\\t%0, %1;"
338 [(set_attr "subregs_ok" "true")])
339
340 (define_insn "trunchiqi2"
341 [(set (match_operand:QI 0 "nvptx_nonimmediate_operand" "=R,m")
342 (truncate:QI (match_operand:HI 1 "nvptx_register_operand" "R,R")))]
343 ""
344 "@
345 %.\\tcvt%t0.u16\\t%0, %1;
346 %.\\tst%A0.u8\\t%0, %1;"
347 [(set_attr "subregs_ok" "true")])
348
349 (define_insn "truncsi<mode>2"
350 [(set (match_operand:QHIM 0 "nvptx_nonimmediate_operand" "=R,m")
351 (truncate:QHIM (match_operand:SI 1 "nvptx_register_operand" "R,R")))]
352 ""
353 "@
354 %.\\tcvt%t0.u32\\t%0, %1;
355 %.\\tst%A0.u%T0\\t%0, %1;"
356 [(set_attr "subregs_ok" "true")])
357
358 (define_insn "truncdi<mode>2"
359 [(set (match_operand:QHSIM 0 "nvptx_nonimmediate_operand" "=R,m")
360 (truncate:QHSIM (match_operand:DI 1 "nvptx_register_operand" "R,R")))]
361 ""
362 "@
363 %.\\tcvt%t0.u64\\t%0, %1;
364 %.\\tst%A0.u%T0\\t%0, %1;"
365 [(set_attr "subregs_ok" "true")])
366
367 ;; Integer arithmetic
368
369 (define_insn "add<mode>3"
370 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
371 (plus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
372 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
373 ""
374 "%.\\tadd%t0\\t%0, %1, %2;")
375
376 (define_insn "*vadd_addsi4"
377 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
378 (plus:SI (plus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
379 (match_operand:SI 2 "nvptx_register_operand" "R"))
380 (match_operand:SI 3 "nvptx_register_operand" "R")))]
381 ""
382 "%.\\tvadd%t0%t1%t2.add\\t%0, %1, %2, %3;")
383
384 (define_insn "*vsub_addsi4"
385 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
386 (plus:SI (minus:SI (match_operand:SI 1 "nvptx_register_operand" "R")
387 (match_operand:SI 2 "nvptx_register_operand" "R"))
388 (match_operand:SI 3 "nvptx_register_operand" "R")))]
389 ""
390 "%.\\tvsub%t0%t1%t2.add\\t%0, %1, %2, %3;")
391
392 (define_insn "sub<mode>3"
393 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
394 (minus:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
395 (match_operand:HSDIM 2 "nvptx_register_operand" "R")))]
396 ""
397 "%.\\tsub%t0\\t%0, %1, %2;")
398
399 (define_insn "mul<mode>3"
400 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
401 (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
402 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
403 ""
404 "%.\\tmul.lo%t0\\t%0, %1, %2;")
405
406 (define_insn "*mad<mode>3"
407 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
408 (plus:HSDIM (mult:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
409 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri"))
410 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
411 ""
412 "%.\\tmad.lo%t0\\t%0, %1, %2, %3;")
413
414 (define_insn "div<mode>3"
415 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
416 (div:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
417 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
418 ""
419 "%.\\tdiv.s%T0\\t%0, %1, %2;")
420
421 (define_insn "udiv<mode>3"
422 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
423 (udiv:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
424 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
425 ""
426 "%.\\tdiv.u%T0\\t%0, %1, %2;")
427
428 (define_insn "mod<mode>3"
429 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
430 (mod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
431 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
432 ""
433 "%.\\trem.s%T0\\t%0, %1, %2;")
434
435 (define_insn "umod<mode>3"
436 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
437 (umod:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "Ri")
438 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
439 ""
440 "%.\\trem.u%T0\\t%0, %1, %2;")
441
442 (define_insn "smin<mode>3"
443 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
444 (smin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
445 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
446 ""
447 "%.\\tmin.s%T0\\t%0, %1, %2;")
448
449 (define_insn "umin<mode>3"
450 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
451 (umin:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
452 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
453 ""
454 "%.\\tmin.u%T0\\t%0, %1, %2;")
455
456 (define_insn "smax<mode>3"
457 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
458 (smax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
459 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
460 ""
461 "%.\\tmax.s%T0\\t%0, %1, %2;")
462
463 (define_insn "umax<mode>3"
464 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
465 (umax:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")
466 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
467 ""
468 "%.\\tmax.u%T0\\t%0, %1, %2;")
469
470 (define_insn "abs<mode>2"
471 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
472 (abs:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
473 ""
474 "%.\\tabs.s%T0\\t%0, %1;")
475
476 (define_insn "neg<mode>2"
477 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
478 (neg:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
479 ""
480 "%.\\tneg.s%T0\\t%0, %1;")
481
482 (define_insn "one_cmpl<mode>2"
483 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
484 (not:HSDIM (match_operand:HSDIM 1 "nvptx_register_operand" "R")))]
485 ""
486 "%.\\tnot.b%T0\\t%0, %1;")
487
488 (define_insn "bitrev<mode>2"
489 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
490 (unspec:SDIM [(match_operand:SDIM 1 "nvptx_register_operand" "R")]
491 UNSPEC_BITREV))]
492 ""
493 "%.\\tbrev.b%T0\\t%0, %1;")
494
495 (define_insn "clz<mode>2"
496 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
497 (clz:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
498 ""
499 "%.\\tclz.b%T1\\t%0, %1;")
500
501 (define_expand "ctz<mode>2"
502 [(set (match_operand:SI 0 "nvptx_register_operand" "")
503 (ctz:SI (match_operand:SDIM 1 "nvptx_register_operand" "")))]
504 ""
505 {
506 rtx tmpreg = gen_reg_rtx (<MODE>mode);
507 emit_insn (gen_bitrev<mode>2 (tmpreg, operands[1]));
508 emit_insn (gen_clz<mode>2 (operands[0], tmpreg));
509 DONE;
510 })
511
512 (define_insn "popcount<mode>2"
513 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
514 (popcount:SI (match_operand:SDIM 1 "nvptx_register_operand" "R")))]
515 ""
516 "%.\\tpopc.b%T1\\t%0, %1;")
517
518 ;; Multiplication variants
519
520 (define_insn "mulhisi3"
521 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
522 (mult:SI (sign_extend:SI
523 (match_operand:HI 1 "nvptx_register_operand" "R"))
524 (sign_extend:SI
525 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
526 ""
527 "%.\\tmul.wide.s16\\t%0, %1, %2;")
528
529 (define_insn "mulsidi3"
530 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
531 (mult:DI (sign_extend:DI
532 (match_operand:SI 1 "nvptx_register_operand" "R"))
533 (sign_extend:DI
534 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
535 ""
536 "%.\\tmul.wide.s32\\t%0, %1, %2;")
537
538 (define_insn "umulhisi3"
539 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
540 (mult:SI (zero_extend:SI
541 (match_operand:HI 1 "nvptx_register_operand" "R"))
542 (zero_extend:SI
543 (match_operand:HI 2 "nvptx_register_operand" "R"))))]
544 ""
545 "%.\\tmul.wide.u16\\t%0, %1, %2;")
546
547 (define_insn "umulsidi3"
548 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
549 (mult:DI (zero_extend:DI
550 (match_operand:SI 1 "nvptx_register_operand" "R"))
551 (zero_extend:DI
552 (match_operand:SI 2 "nvptx_register_operand" "R"))))]
553 ""
554 "%.\\tmul.wide.u32\\t%0, %1, %2;")
555
556 ;; Shifts
557
558 (define_insn "ashl<mode>3"
559 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
560 (ashift:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
561 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
562 ""
563 "%.\\tshl.b%T0\\t%0, %1, %2;")
564
565 (define_insn "ashr<mode>3"
566 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
567 (ashiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
568 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
569 ""
570 "%.\\tshr.s%T0\\t%0, %1, %2;")
571
572 (define_insn "lshr<mode>3"
573 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
574 (lshiftrt:SDIM (match_operand:SDIM 1 "nvptx_register_operand" "R")
575 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")))]
576 ""
577 "%.\\tshr.u%T0\\t%0, %1, %2;")
578
579 ;; Logical operations
580
581 (define_insn "and<mode>3"
582 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
583 (and:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
584 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
585 ""
586 "%.\\tand.b%T0\\t%0, %1, %2;")
587
588 (define_insn "ior<mode>3"
589 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
590 (ior:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
591 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
592 ""
593 "%.\\tor.b%T0\\t%0, %1, %2;")
594
595 (define_insn "xor<mode>3"
596 [(set (match_operand:BHSDIM 0 "nvptx_register_operand" "=R")
597 (xor:BHSDIM (match_operand:BHSDIM 1 "nvptx_register_operand" "R")
598 (match_operand:BHSDIM 2 "nvptx_nonmemory_operand" "Ri")))]
599 ""
600 "%.\\txor.b%T0\\t%0, %1, %2;")
601
602 ;; Comparisons and branches
603
604 (define_insn "*cmp<mode>"
605 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
606 (match_operator:BI 1 "nvptx_comparison_operator"
607 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
608 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
609 ""
610 "%.\\tsetp%c1\\t%0, %2, %3;")
611
612 (define_insn "*cmp<mode>"
613 [(set (match_operand:BI 0 "nvptx_register_operand" "=R")
614 (match_operator:BI 1 "nvptx_float_comparison_operator"
615 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
616 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
617 ""
618 "%.\\tsetp%c1\\t%0, %2, %3;")
619
620 (define_insn "jump"
621 [(set (pc)
622 (label_ref (match_operand 0 "" "")))]
623 ""
624 "%.\\tbra\\t%l0;")
625
626 (define_insn "br_true"
627 [(set (pc)
628 (if_then_else (ne (match_operand:BI 0 "nvptx_register_operand" "R")
629 (const_int 0))
630 (label_ref (match_operand 1 "" ""))
631 (pc)))]
632 ""
633 "%j0\\tbra\\t%l1;"
634 [(set_attr "predicable" "false")])
635
636 (define_insn "br_false"
637 [(set (pc)
638 (if_then_else (eq (match_operand:BI 0 "nvptx_register_operand" "R")
639 (const_int 0))
640 (label_ref (match_operand 1 "" ""))
641 (pc)))]
642 ""
643 "%J0\\tbra\\t%l1;"
644 [(set_attr "predicable" "false")])
645
646 ;; unified conditional branch
647 (define_insn "br_true_uni"
648 [(set (pc) (if_then_else
649 (ne (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
650 UNSPEC_BR_UNIFIED) (const_int 0))
651 (label_ref (match_operand 1 "" "")) (pc)))]
652 ""
653 "%j0\\tbra.uni\\t%l1;"
654 [(set_attr "predicable" "false")])
655
656 (define_insn "br_false_uni"
657 [(set (pc) (if_then_else
658 (eq (unspec:BI [(match_operand:BI 0 "nvptx_register_operand" "R")]
659 UNSPEC_BR_UNIFIED) (const_int 0))
660 (label_ref (match_operand 1 "" "")) (pc)))]
661 ""
662 "%J0\\tbra.uni\\t%l1;"
663 [(set_attr "predicable" "false")])
664
665 (define_expand "cbranch<mode>4"
666 [(set (pc)
667 (if_then_else (match_operator 0 "nvptx_comparison_operator"
668 [(match_operand:HSDIM 1 "nvptx_register_operand" "")
669 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "")])
670 (label_ref (match_operand 3 "" ""))
671 (pc)))]
672 ""
673 {
674 rtx t = nvptx_expand_compare (operands[0]);
675 operands[0] = t;
676 operands[1] = XEXP (t, 0);
677 operands[2] = XEXP (t, 1);
678 })
679
680 (define_expand "cbranch<mode>4"
681 [(set (pc)
682 (if_then_else (match_operator 0 "nvptx_float_comparison_operator"
683 [(match_operand:SDFM 1 "nvptx_register_operand" "")
684 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "")])
685 (label_ref (match_operand 3 "" ""))
686 (pc)))]
687 ""
688 {
689 rtx t = nvptx_expand_compare (operands[0]);
690 operands[0] = t;
691 operands[1] = XEXP (t, 0);
692 operands[2] = XEXP (t, 1);
693 })
694
695 (define_expand "cbranchbi4"
696 [(set (pc)
697 (if_then_else (match_operator 0 "predicate_operator"
698 [(match_operand:BI 1 "nvptx_register_operand" "")
699 (match_operand:BI 2 "const0_operand" "")])
700 (label_ref (match_operand 3 "" ""))
701 (pc)))]
702 ""
703 "")
704
705 ;; Conditional stores
706
707 (define_insn "setcc_from_bi"
708 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
709 (ne:SI (match_operand:BI 1 "nvptx_register_operand" "R")
710 (const_int 0)))]
711 ""
712 "%.\\tselp%t0 %0,-1,0,%1;")
713
714 (define_insn "sel_true<mode>"
715 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
716 (if_then_else:HSDIM
717 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
718 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
719 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
720 ""
721 "%.\\tselp%t0\\t%0, %2, %3, %1;")
722
723 (define_insn "sel_true<mode>"
724 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
725 (if_then_else:SDFM
726 (ne (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
727 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
728 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
729 ""
730 "%.\\tselp%t0\\t%0, %2, %3, %1;")
731
732 (define_insn "sel_false<mode>"
733 [(set (match_operand:HSDIM 0 "nvptx_register_operand" "=R")
734 (if_then_else:HSDIM
735 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
736 (match_operand:HSDIM 2 "nvptx_nonmemory_operand" "Ri")
737 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")))]
738 ""
739 "%.\\tselp%t0\\t%0, %3, %2, %1;")
740
741 (define_insn "sel_false<mode>"
742 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
743 (if_then_else:SDFM
744 (eq (match_operand:BI 1 "nvptx_register_operand" "R") (const_int 0))
745 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
746 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
747 ""
748 "%.\\tselp%t0\\t%0, %3, %2, %1;")
749
750 (define_insn "setcc_int<mode>"
751 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
752 (match_operator:SI 1 "nvptx_comparison_operator"
753 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
754 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
755 ""
756 "%.\\tset%t0%c1\\t%0, %2, %3;")
757
758 (define_insn "setcc_int<mode>"
759 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
760 (match_operator:SI 1 "nvptx_float_comparison_operator"
761 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
762 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
763 ""
764 "%.\\tset%t0%c1\\t%0, %2, %3;")
765
766 (define_insn "setcc_float<mode>"
767 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
768 (match_operator:SF 1 "nvptx_comparison_operator"
769 [(match_operand:HSDIM 2 "nvptx_register_operand" "R")
770 (match_operand:HSDIM 3 "nvptx_nonmemory_operand" "Ri")]))]
771 ""
772 "%.\\tset%t0%c1\\t%0, %2, %3;")
773
774 (define_insn "setcc_float<mode>"
775 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
776 (match_operator:SF 1 "nvptx_float_comparison_operator"
777 [(match_operand:SDFM 2 "nvptx_register_operand" "R")
778 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")]))]
779 ""
780 "%.\\tset%t0%c1\\t%0, %2, %3;")
781
782 (define_expand "cstorebi4"
783 [(set (match_operand:SI 0 "nvptx_register_operand")
784 (match_operator:SI 1 "ne_operator"
785 [(match_operand:BI 2 "nvptx_register_operand")
786 (match_operand:BI 3 "const0_operand")]))]
787 ""
788 "")
789
790 (define_expand "cstore<mode>4"
791 [(set (match_operand:SI 0 "nvptx_register_operand")
792 (match_operator:SI 1 "nvptx_comparison_operator"
793 [(match_operand:HSDIM 2 "nvptx_register_operand")
794 (match_operand:HSDIM 3 "nvptx_nonmemory_operand")]))]
795 ""
796 "")
797
798 (define_expand "cstore<mode>4"
799 [(set (match_operand:SI 0 "nvptx_register_operand")
800 (match_operator:SI 1 "nvptx_float_comparison_operator"
801 [(match_operand:SDFM 2 "nvptx_register_operand")
802 (match_operand:SDFM 3 "nvptx_nonmemory_operand")]))]
803 ""
804 "")
805
806 ;; Calls
807
808 (define_insn "call_insn_<mode>"
809 [(match_parallel 2 "call_operation"
810 [(call (mem:QI (match_operand:P 0 "call_insn_operand" "Rs"))
811 (match_operand 1))])]
812 ""
813 {
814 return nvptx_output_call_insn (insn, NULL_RTX, operands[0]);
815 })
816
817 (define_insn "call_value_insn_<mode>"
818 [(match_parallel 3 "call_operation"
819 [(set (match_operand 0 "nvptx_register_operand" "=R")
820 (call (mem:QI (match_operand:P 1 "call_insn_operand" "Rs"))
821 (match_operand 2)))])]
822 ""
823 {
824 return nvptx_output_call_insn (insn, operands[0], operands[1]);
825 })
826
827 (define_expand "call"
828 [(match_operand 0 "" "")]
829 ""
830 {
831 nvptx_expand_call (NULL_RTX, operands[0]);
832 DONE;
833 })
834
835 (define_expand "call_value"
836 [(match_operand 0 "" "")
837 (match_operand 1 "" "")]
838 ""
839 {
840 nvptx_expand_call (operands[0], operands[1]);
841 DONE;
842 })
843
844 ;; Floating point arithmetic.
845
846 (define_insn "add<mode>3"
847 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
848 (plus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
849 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
850 ""
851 "%.\\tadd%t0\\t%0, %1, %2;")
852
853 (define_insn "sub<mode>3"
854 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
855 (minus:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
856 (match_operand:SDFM 2 "nvptx_register_operand" "R")))]
857 ""
858 "%.\\tsub%t0\\t%0, %1, %2;")
859
860 (define_insn "mul<mode>3"
861 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
862 (mult:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
863 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
864 ""
865 "%.\\tmul%t0\\t%0, %1, %2;")
866
867 (define_insn "fma<mode>4"
868 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
869 (fma:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
870 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")
871 (match_operand:SDFM 3 "nvptx_nonmemory_operand" "RF")))]
872 ""
873 "%.\\tfma%#%t0\\t%0, %1, %2, %3;")
874
875 (define_insn "div<mode>3"
876 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
877 (div:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
878 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
879 ""
880 "%.\\tdiv%#%t0\\t%0, %1, %2;")
881
882 (define_insn "copysign<mode>3"
883 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
884 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")
885 (match_operand:SDFM 2 "nvptx_register_operand" "R")]
886 UNSPEC_COPYSIGN))]
887 ""
888 "%.\\tcopysign%t0\\t%0, %2, %1;")
889
890 (define_insn "smin<mode>3"
891 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
892 (smin:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
893 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
894 ""
895 "%.\\tmin%t0\\t%0, %1, %2;")
896
897 (define_insn "smax<mode>3"
898 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
899 (smax:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")
900 (match_operand:SDFM 2 "nvptx_nonmemory_operand" "RF")))]
901 ""
902 "%.\\tmax%t0\\t%0, %1, %2;")
903
904 (define_insn "abs<mode>2"
905 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
906 (abs:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
907 ""
908 "%.\\tabs%t0\\t%0, %1;")
909
910 (define_insn "neg<mode>2"
911 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
912 (neg:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
913 ""
914 "%.\\tneg%t0\\t%0, %1;")
915
916 (define_insn "sqrt<mode>2"
917 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
918 (sqrt:SDFM (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
919 ""
920 "%.\\tsqrt%#%t0\\t%0, %1;")
921
922 (define_expand "sincossf3"
923 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
924 (unspec:SF [(match_operand:SF 2 "nvptx_register_operand" "R")]
925 UNSPEC_COS))
926 (set (match_operand:SF 1 "nvptx_register_operand" "=R")
927 (unspec:SF [(match_dup 2)] UNSPEC_SIN))]
928 "flag_unsafe_math_optimizations"
929 {
930 operands[2] = make_safe_from (operands[2], operands[0]);
931 })
932
933 (define_insn "sinsf2"
934 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
935 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
936 UNSPEC_SIN))]
937 "flag_unsafe_math_optimizations"
938 "%.\\tsin.approx%t0\\t%0, %1;")
939
940 (define_insn "cossf2"
941 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
942 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
943 UNSPEC_COS))]
944 "flag_unsafe_math_optimizations"
945 "%.\\tcos.approx%t0\\t%0, %1;")
946
947 (define_insn "log2sf2"
948 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
949 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
950 UNSPEC_LOG2))]
951 "flag_unsafe_math_optimizations"
952 "%.\\tlg2.approx%t0\\t%0, %1;")
953
954 (define_insn "exp2sf2"
955 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
956 (unspec:SF [(match_operand:SF 1 "nvptx_register_operand" "R")]
957 UNSPEC_EXP2))]
958 "flag_unsafe_math_optimizations"
959 "%.\\tex2.approx%t0\\t%0, %1;")
960
961 ;; Conversions involving floating point
962
963 (define_insn "extendsfdf2"
964 [(set (match_operand:DF 0 "nvptx_register_operand" "=R")
965 (float_extend:DF (match_operand:SF 1 "nvptx_register_operand" "R")))]
966 ""
967 "%.\\tcvt%t0%t1\\t%0, %1;")
968
969 (define_insn "truncdfsf2"
970 [(set (match_operand:SF 0 "nvptx_register_operand" "=R")
971 (float_truncate:SF (match_operand:DF 1 "nvptx_register_operand" "R")))]
972 ""
973 "%.\\tcvt%#%t0%t1\\t%0, %1;")
974
975 (define_insn "floatunssi<mode>2"
976 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
977 (unsigned_float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
978 ""
979 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
980
981 (define_insn "floatsi<mode>2"
982 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
983 (float:SDFM (match_operand:SI 1 "nvptx_register_operand" "R")))]
984 ""
985 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
986
987 (define_insn "floatunsdi<mode>2"
988 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
989 (unsigned_float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
990 ""
991 "%.\\tcvt%#%t0.u%T1\\t%0, %1;")
992
993 (define_insn "floatdi<mode>2"
994 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
995 (float:SDFM (match_operand:DI 1 "nvptx_register_operand" "R")))]
996 ""
997 "%.\\tcvt%#%t0.s%T1\\t%0, %1;")
998
999 (define_insn "fixuns_trunc<mode>si2"
1000 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1001 (unsigned_fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1002 ""
1003 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1004
1005 (define_insn "fix_trunc<mode>si2"
1006 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1007 (fix:SI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1008 ""
1009 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1010
1011 (define_insn "fixuns_trunc<mode>di2"
1012 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1013 (unsigned_fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1014 ""
1015 "%.\\tcvt.rzi.u%T0%t1\\t%0, %1;")
1016
1017 (define_insn "fix_trunc<mode>di2"
1018 [(set (match_operand:DI 0 "nvptx_register_operand" "=R")
1019 (fix:DI (match_operand:SDFM 1 "nvptx_register_operand" "R")))]
1020 ""
1021 "%.\\tcvt.rzi.s%T0%t1\\t%0, %1;")
1022
1023 (define_int_iterator FPINT [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_BTRUNC
1024 UNSPEC_FPINT_CEIL UNSPEC_FPINT_NEARBYINT])
1025 (define_int_attr fpint_name [(UNSPEC_FPINT_FLOOR "floor")
1026 (UNSPEC_FPINT_BTRUNC "btrunc")
1027 (UNSPEC_FPINT_CEIL "ceil")
1028 (UNSPEC_FPINT_NEARBYINT "nearbyint")])
1029 (define_int_attr fpint_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1030 (UNSPEC_FPINT_BTRUNC ".rzi")
1031 (UNSPEC_FPINT_CEIL ".rpi")
1032 (UNSPEC_FPINT_NEARBYINT "%#i")])
1033
1034 (define_insn "<FPINT:fpint_name><SDFM:mode>2"
1035 [(set (match_operand:SDFM 0 "nvptx_register_operand" "=R")
1036 (unspec:SDFM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1037 FPINT))]
1038 ""
1039 "%.\\tcvt<FPINT:fpint_roundingmode>%t0%t1\\t%0, %1;")
1040
1041 (define_int_iterator FPINT2 [UNSPEC_FPINT_FLOOR UNSPEC_FPINT_CEIL])
1042 (define_int_attr fpint2_name [(UNSPEC_FPINT_FLOOR "lfloor")
1043 (UNSPEC_FPINT_CEIL "lceil")])
1044 (define_int_attr fpint2_roundingmode [(UNSPEC_FPINT_FLOOR ".rmi")
1045 (UNSPEC_FPINT_CEIL ".rpi")])
1046
1047 (define_insn "<FPINT2:fpint2_name><SDFM:mode><SDIM:mode>2"
1048 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1049 (unspec:SDIM [(match_operand:SDFM 1 "nvptx_register_operand" "R")]
1050 FPINT2))]
1051 ""
1052 "%.\\tcvt<FPINT2:fpint2_roundingmode>.s%T0%t1\\t%0, %1;")
1053
1054 ;; Miscellaneous
1055
1056 (define_insn "nop"
1057 [(const_int 0)]
1058 ""
1059 "")
1060
1061 (define_insn "exit"
1062 [(const_int 1)]
1063 ""
1064 "exit;")
1065
1066 (define_insn "fake_nop"
1067 [(const_int 2)]
1068 ""
1069 "{
1070 .reg .u32 %%nop_src;
1071 .reg .u32 %%nop_dst;
1072 mov.u32 %%nop_dst, %%nop_src;
1073 }")
1074
1075 (define_insn "return"
1076 [(return)]
1077 ""
1078 {
1079 return nvptx_output_return ();
1080 }
1081 [(set_attr "predicable" "false")])
1082
1083 (define_expand "epilogue"
1084 [(clobber (const_int 0))]
1085 ""
1086 {
1087 if (TARGET_SOFT_STACK)
1088 emit_insn (gen_set_softstack (Pmode, gen_rtx_REG (Pmode,
1089 SOFTSTACK_PREV_REGNUM)));
1090 emit_jump_insn (gen_return ());
1091 DONE;
1092 })
1093
1094 (define_expand "nonlocal_goto"
1095 [(match_operand 0 "" "")
1096 (match_operand 1 "" "")
1097 (match_operand 2 "" "")
1098 (match_operand 3 "" "")]
1099 ""
1100 {
1101 sorry ("target cannot support nonlocal goto.");
1102 emit_insn (gen_nop ());
1103 DONE;
1104 })
1105
1106 (define_expand "nonlocal_goto_receiver"
1107 [(const_int 0)]
1108 ""
1109 {
1110 sorry ("target cannot support nonlocal goto.");
1111 })
1112
1113 (define_expand "allocate_stack"
1114 [(match_operand 0 "nvptx_register_operand")
1115 (match_operand 1 "nvptx_register_operand")]
1116 ""
1117 {
1118 if (TARGET_SOFT_STACK)
1119 {
1120 emit_move_insn (stack_pointer_rtx,
1121 gen_rtx_MINUS (Pmode, stack_pointer_rtx, operands[1]));
1122 emit_insn (gen_set_softstack (Pmode, stack_pointer_rtx));
1123 emit_move_insn (operands[0], virtual_stack_dynamic_rtx);
1124 DONE;
1125 }
1126 /* The ptx documentation specifies an alloca intrinsic (for 32 bit
1127 only) but notes it is not implemented. The assembler emits a
1128 confused error message. Issue a blunt one now instead. */
1129 sorry ("target cannot support alloca.");
1130 emit_insn (gen_nop ());
1131 DONE;
1132 })
1133
1134 (define_insn "@set_softstack_<mode>"
1135 [(unspec [(match_operand:P 0 "nvptx_register_operand" "R")]
1136 UNSPEC_SET_SOFTSTACK)]
1137 "TARGET_SOFT_STACK"
1138 {
1139 return nvptx_output_set_softstack (REGNO (operands[0]));
1140 })
1141
1142 (define_expand "restore_stack_block"
1143 [(match_operand 0 "register_operand" "")
1144 (match_operand 1 "register_operand" "")]
1145 ""
1146 {
1147 if (TARGET_SOFT_STACK)
1148 {
1149 emit_move_insn (operands[0], operands[1]);
1150 emit_insn (gen_set_softstack (Pmode, operands[0]));
1151 }
1152 DONE;
1153 })
1154
1155 (define_expand "restore_stack_function"
1156 [(match_operand 0 "register_operand" "")
1157 (match_operand 1 "register_operand" "")]
1158 ""
1159 {
1160 DONE;
1161 })
1162
1163 (define_insn "trap"
1164 [(trap_if (const_int 1) (const_int 0))]
1165 ""
1166 "trap; exit;")
1167
1168 (define_insn "trap_if_true"
1169 [(trap_if (ne (match_operand:BI 0 "nvptx_register_operand" "R")
1170 (const_int 0))
1171 (const_int 0))]
1172 ""
1173 "%j0 trap; %j0 exit;"
1174 [(set_attr "predicable" "false")])
1175
1176 (define_insn "trap_if_false"
1177 [(trap_if (eq (match_operand:BI 0 "nvptx_register_operand" "R")
1178 (const_int 0))
1179 (const_int 0))]
1180 ""
1181 "%J0 trap; %J0 exit;"
1182 [(set_attr "predicable" "false")])
1183
1184 (define_expand "ctrap<mode>4"
1185 [(trap_if (match_operator 0 "nvptx_comparison_operator"
1186 [(match_operand:SDIM 1 "nvptx_register_operand")
1187 (match_operand:SDIM 2 "nvptx_nonmemory_operand")])
1188 (match_operand 3 "const0_operand"))]
1189 ""
1190 {
1191 rtx t = nvptx_expand_compare (operands[0]);
1192 emit_insn (gen_trap_if_true (t));
1193 DONE;
1194 })
1195
1196 (define_insn "oacc_dim_size"
1197 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1198 (unspec:SI [(match_operand:SI 1 "const_int_operand" "")]
1199 UNSPEC_DIM_SIZE))]
1200 ""
1201 {
1202 static const char *const asms[] =
1203 { /* Must match oacc_loop_levels ordering. */
1204 "%.\\tmov.u32\\t%0, %%nctaid.x;", /* gang */
1205 "%.\\tmov.u32\\t%0, %%ntid.y;", /* worker */
1206 "%.\\tmov.u32\\t%0, %%ntid.x;", /* vector */
1207 };
1208 return asms[INTVAL (operands[1])];
1209 })
1210
1211 (define_insn "oacc_dim_pos"
1212 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1213 (unspec_volatile:SI [(match_operand:SI 1 "const_int_operand" "")]
1214 UNSPECV_DIM_POS))]
1215 ""
1216 {
1217 static const char *const asms[] =
1218 { /* Must match oacc_loop_levels ordering. */
1219 "%.\\tmov.u32\\t%0, %%ctaid.x;", /* gang */
1220 "%.\\tmov.u32\\t%0, %%tid.y;", /* worker */
1221 "%.\\tmov.u32\\t%0, %%tid.x;", /* vector */
1222 };
1223 return asms[INTVAL (operands[1])];
1224 })
1225
1226 (define_insn "nvptx_fork"
1227 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1228 UNSPECV_FORK)]
1229 ""
1230 "// fork %0;"
1231 [(set_attr "predicable" "false")])
1232
1233 (define_insn "nvptx_forked"
1234 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1235 UNSPECV_FORKED)]
1236 ""
1237 "// forked %0;"
1238 [(set_attr "predicable" "false")])
1239
1240 (define_insn "nvptx_joining"
1241 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1242 UNSPECV_JOINING)]
1243 ""
1244 "// joining %0;"
1245 [(set_attr "predicable" "false")])
1246
1247 (define_insn "nvptx_join"
1248 [(unspec_volatile:SI [(match_operand:SI 0 "const_int_operand" "")]
1249 UNSPECV_JOIN)]
1250 ""
1251 "// join %0;"
1252 [(set_attr "predicable" "false")])
1253
1254 (define_expand "oacc_fork"
1255 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1256 (match_operand:SI 1 "general_operand" ""))
1257 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1258 UNSPECV_FORKED)]
1259 ""
1260 {
1261 if (operands[0] != const0_rtx)
1262 emit_move_insn (operands[0], operands[1]);
1263 nvptx_expand_oacc_fork (INTVAL (operands[2]));
1264 DONE;
1265 })
1266
1267 (define_expand "oacc_join"
1268 [(set (match_operand:SI 0 "nvptx_nonmemory_operand" "")
1269 (match_operand:SI 1 "general_operand" ""))
1270 (unspec_volatile:SI [(match_operand:SI 2 "const_int_operand" "")]
1271 UNSPECV_JOIN)]
1272 ""
1273 {
1274 if (operands[0] != const0_rtx)
1275 emit_move_insn (operands[0], operands[1]);
1276 nvptx_expand_oacc_join (INTVAL (operands[2]));
1277 DONE;
1278 })
1279
1280 ;; only 32-bit shuffles exist.
1281 (define_insn "nvptx_shuffle<mode>"
1282 [(set (match_operand:BITS 0 "nvptx_register_operand" "=R")
1283 (unspec:BITS
1284 [(match_operand:BITS 1 "nvptx_register_operand" "R")
1285 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")
1286 (match_operand:SI 3 "const_int_operand" "n")]
1287 UNSPEC_SHUFFLE))]
1288 ""
1289 "%.\\tshfl%S3.b32\\t%0, %1, %2, 31;")
1290
1291 (define_insn "nvptx_vote_ballot"
1292 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1293 (unspec:SI [(match_operand:BI 1 "nvptx_register_operand" "R")]
1294 UNSPEC_VOTE_BALLOT))]
1295 ""
1296 "%.\\tvote.ballot.b32\\t%0, %1;")
1297
1298 ;; Patterns for OpenMP SIMD-via-SIMT lowering
1299
1300 (define_insn "@omp_simt_enter_<mode>"
1301 [(set (match_operand:P 0 "nvptx_register_operand" "=R")
1302 (unspec_volatile:P [(match_operand:P 1 "nvptx_nonmemory_operand" "Ri")
1303 (match_operand:P 2 "nvptx_nonmemory_operand" "Ri")]
1304 UNSPECV_SIMT_ENTER))]
1305 ""
1306 {
1307 return nvptx_output_simt_enter (operands[0], operands[1], operands[2]);
1308 })
1309
1310 (define_expand "omp_simt_enter"
1311 [(match_operand 0 "nvptx_register_operand" "=R")
1312 (match_operand 1 "nvptx_nonmemory_operand" "Ri")
1313 (match_operand 2 "const_int_operand" "n")]
1314 ""
1315 {
1316 if (!CONST_INT_P (operands[1]))
1317 cfun->machine->simt_stack_size = HOST_WIDE_INT_M1U;
1318 else
1319 cfun->machine->simt_stack_size = MAX (UINTVAL (operands[1]),
1320 cfun->machine->simt_stack_size);
1321 cfun->machine->simt_stack_align = MAX (UINTVAL (operands[2]),
1322 cfun->machine->simt_stack_align);
1323 cfun->machine->has_simtreg = true;
1324 emit_insn (gen_omp_simt_enter (Pmode, operands[0], operands[1], operands[2]));
1325 DONE;
1326 })
1327
1328 (define_expand "omp_simt_exit"
1329 [(match_operand 0 "nvptx_register_operand" "R")]
1330 ""
1331 {
1332 emit_insn (gen_omp_simt_exit (Pmode, operands[0]));
1333 DONE;
1334 })
1335
1336 (define_insn "@omp_simt_exit_<mode>"
1337 [(unspec_volatile [(match_operand:P 0 "nvptx_register_operand" "R")]
1338 UNSPECV_SIMT_EXIT)]
1339 ""
1340 {
1341 return nvptx_output_simt_exit (operands[0]);
1342 })
1343
1344 ;; Implement IFN_GOMP_SIMT_LANE: set operand 0 to lane index
1345 (define_insn "omp_simt_lane"
1346 [(set (match_operand:SI 0 "nvptx_register_operand" "")
1347 (unspec:SI [(const_int 0)] UNSPEC_LANEID))]
1348 ""
1349 "%.\\tmov.u32\\t%0, %%laneid;")
1350
1351 ;; Implement IFN_GOMP_SIMT_ORDERED: copy operand 1 to operand 0 and
1352 ;; place a compiler barrier to disallow unrolling/peeling the containing loop
1353 (define_expand "omp_simt_ordered"
1354 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1355 (match_operand:SI 1 "nvptx_register_operand" "R")]
1356 ""
1357 {
1358 emit_move_insn (operands[0], operands[1]);
1359 emit_insn (gen_nvptx_nounroll ());
1360 DONE;
1361 })
1362
1363 ;; Implement IFN_GOMP_SIMT_XCHG_BFLY: perform a "butterfly" exchange
1364 ;; across lanes
1365 (define_expand "omp_simt_xchg_bfly"
1366 [(match_operand 0 "nvptx_register_operand" "=R")
1367 (match_operand 1 "nvptx_register_operand" "R")
1368 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1369 ""
1370 {
1371 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1372 SHUFFLE_BFLY));
1373 DONE;
1374 })
1375
1376 ;; Implement IFN_GOMP_SIMT_XCHG_IDX: broadcast value in operand 1
1377 ;; from lane given by index in operand 2 to operand 0 in all lanes
1378 (define_expand "omp_simt_xchg_idx"
1379 [(match_operand 0 "nvptx_register_operand" "=R")
1380 (match_operand 1 "nvptx_register_operand" "R")
1381 (match_operand:SI 2 "nvptx_nonmemory_operand" "Ri")]
1382 ""
1383 {
1384 emit_insn (nvptx_gen_shuffle (operands[0], operands[1], operands[2],
1385 SHUFFLE_IDX));
1386 DONE;
1387 })
1388
1389 ;; Implement IFN_GOMP_SIMT_VOTE_ANY:
1390 ;; set operand 0 to zero iff all lanes supply zero in operand 1
1391 (define_expand "omp_simt_vote_any"
1392 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1393 (match_operand:SI 1 "nvptx_register_operand" "R")]
1394 ""
1395 {
1396 rtx pred = gen_reg_rtx (BImode);
1397 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1398 emit_insn (gen_nvptx_vote_ballot (operands[0], pred));
1399 DONE;
1400 })
1401
1402 ;; Implement IFN_GOMP_SIMT_LAST_LANE:
1403 ;; set operand 0 to the lowest lane index that passed non-zero in operand 1
1404 (define_expand "omp_simt_last_lane"
1405 [(match_operand:SI 0 "nvptx_register_operand" "=R")
1406 (match_operand:SI 1 "nvptx_register_operand" "R")]
1407 ""
1408 {
1409 rtx pred = gen_reg_rtx (BImode);
1410 rtx tmp = gen_reg_rtx (SImode);
1411 emit_move_insn (pred, gen_rtx_NE (BImode, operands[1], const0_rtx));
1412 emit_insn (gen_nvptx_vote_ballot (tmp, pred));
1413 emit_insn (gen_ctzsi2 (operands[0], tmp));
1414 DONE;
1415 })
1416
1417 ;; extract parts of a 64 bit object into 2 32-bit ints
1418 (define_insn "unpack<mode>si2"
1419 [(set (match_operand:SI 0 "nvptx_register_operand" "=R")
1420 (unspec:SI [(match_operand:BITD 2 "nvptx_register_operand" "R")
1421 (const_int 0)] UNSPEC_BIT_CONV))
1422 (set (match_operand:SI 1 "nvptx_register_operand" "=R")
1423 (unspec:SI [(match_dup 2) (const_int 1)] UNSPEC_BIT_CONV))]
1424 ""
1425 "%.\\tmov.b64\\t{%0,%1}, %2;")
1426
1427 ;; pack 2 32-bit ints into a 64 bit object
1428 (define_insn "packsi<mode>2"
1429 [(set (match_operand:BITD 0 "nvptx_register_operand" "=R")
1430 (unspec:BITD [(match_operand:SI 1 "nvptx_register_operand" "R")
1431 (match_operand:SI 2 "nvptx_register_operand" "R")]
1432 UNSPEC_BIT_CONV))]
1433 ""
1434 "%.\\tmov.b64\\t%0, {%1,%2};")
1435
1436 ;; Atomic insns.
1437
1438 (define_expand "atomic_compare_and_swap<mode>"
1439 [(match_operand:SI 0 "nvptx_register_operand") ;; bool success output
1440 (match_operand:SDIM 1 "nvptx_register_operand") ;; oldval output
1441 (match_operand:SDIM 2 "memory_operand") ;; memory
1442 (match_operand:SDIM 3 "nvptx_register_operand") ;; expected input
1443 (match_operand:SDIM 4 "nvptx_register_operand") ;; newval input
1444 (match_operand:SI 5 "const_int_operand") ;; is_weak
1445 (match_operand:SI 6 "const_int_operand") ;; success model
1446 (match_operand:SI 7 "const_int_operand")] ;; failure model
1447 ""
1448 {
1449 emit_insn (gen_atomic_compare_and_swap<mode>_1
1450 (operands[1], operands[2], operands[3], operands[4], operands[6]));
1451
1452 rtx cond = gen_reg_rtx (BImode);
1453 emit_move_insn (cond, gen_rtx_EQ (BImode, operands[1], operands[3]));
1454 emit_insn (gen_sel_truesi (operands[0], cond, GEN_INT (1), GEN_INT (0)));
1455 DONE;
1456 })
1457
1458 (define_insn "atomic_compare_and_swap<mode>_1"
1459 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1460 (unspec_volatile:SDIM
1461 [(match_operand:SDIM 1 "memory_operand" "+m")
1462 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri")
1463 (match_operand:SDIM 3 "nvptx_nonmemory_operand" "Ri")
1464 (match_operand:SI 4 "const_int_operand")]
1465 UNSPECV_CAS))
1466 (set (match_dup 1)
1467 (unspec_volatile:SDIM [(const_int 0)] UNSPECV_CAS))]
1468 ""
1469 "%.\\tatom%A1.cas.b%T0\\t%0, %1, %2, %3;"
1470 [(set_attr "atomic" "true")])
1471
1472 (define_insn "atomic_exchange<mode>"
1473 [(set (match_operand:SDIM 0 "nvptx_register_operand" "=R") ;; output
1474 (unspec_volatile:SDIM
1475 [(match_operand:SDIM 1 "memory_operand" "+m") ;; memory
1476 (match_operand:SI 3 "const_int_operand")] ;; model
1477 UNSPECV_XCHG))
1478 (set (match_dup 1)
1479 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))] ;; input
1480 ""
1481 "%.\\tatom%A1.exch.b%T0\\t%0, %1, %2;"
1482 [(set_attr "atomic" "true")])
1483
1484 (define_insn "atomic_fetch_add<mode>"
1485 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1486 (unspec_volatile:SDIM
1487 [(plus:SDIM (match_dup 1)
1488 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1489 (match_operand:SI 3 "const_int_operand")] ;; model
1490 UNSPECV_LOCK))
1491 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1492 (match_dup 1))]
1493 ""
1494 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1495 [(set_attr "atomic" "true")])
1496
1497 (define_insn "atomic_fetch_addsf"
1498 [(set (match_operand:SF 1 "memory_operand" "+m")
1499 (unspec_volatile:SF
1500 [(plus:SF (match_dup 1)
1501 (match_operand:SF 2 "nvptx_nonmemory_operand" "RF"))
1502 (match_operand:SI 3 "const_int_operand")] ;; model
1503 UNSPECV_LOCK))
1504 (set (match_operand:SF 0 "nvptx_register_operand" "=R")
1505 (match_dup 1))]
1506 ""
1507 "%.\\tatom%A1.add%t0\\t%0, %1, %2;"
1508 [(set_attr "atomic" "true")])
1509
1510 (define_code_iterator any_logic [and ior xor])
1511 (define_code_attr logic [(and "and") (ior "or") (xor "xor")])
1512
1513 (define_insn "atomic_fetch_<logic><mode>"
1514 [(set (match_operand:SDIM 1 "memory_operand" "+m")
1515 (unspec_volatile:SDIM
1516 [(any_logic:SDIM (match_dup 1)
1517 (match_operand:SDIM 2 "nvptx_nonmemory_operand" "Ri"))
1518 (match_operand:SI 3 "const_int_operand")] ;; model
1519 UNSPECV_LOCK))
1520 (set (match_operand:SDIM 0 "nvptx_register_operand" "=R")
1521 (match_dup 1))]
1522 "<MODE>mode == SImode || TARGET_SM35"
1523 "%.\\tatom%A1.b%T0.<logic>\\t%0, %1, %2;"
1524 [(set_attr "atomic" "true")])
1525
1526 (define_insn "nvptx_barsync"
1527 [(unspec_volatile [(match_operand:SI 0 "nvptx_nonmemory_operand" "Ri")
1528 (match_operand:SI 1 "const_int_operand")]
1529 UNSPECV_BARSYNC)]
1530 ""
1531 {
1532 if (INTVAL (operands[1]) == 0)
1533 return "\\tbar.sync\\t%0;";
1534 else
1535 return "\\tbar.sync\\t%0, %1;";
1536 }
1537 [(set_attr "predicable" "false")])
1538
1539 (define_expand "memory_barrier"
1540 [(set (match_dup 0)
1541 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1542 ""
1543 {
1544 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1545 MEM_VOLATILE_P (operands[0]) = 1;
1546 })
1547
1548 ;; Ptx defines the memory barriers membar.cta, membar.gl and membar.sys
1549 ;; (corresponding to cuda functions threadfence_block, threadfence and
1550 ;; threadfence_system). For the insn memory_barrier we use membar.sys. This
1551 ;; may be overconservative, but before using membar.gl instead we'll need to
1552 ;; explain in detail why it's safe to use. For now, use membar.sys.
1553 (define_insn "*memory_barrier"
1554 [(set (match_operand:BLK 0 "" "")
1555 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR))]
1556 ""
1557 "\\tmembar.sys;"
1558 [(set_attr "predicable" "false")])
1559
1560 (define_expand "nvptx_membar_cta"
1561 [(set (match_dup 0)
1562 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1563 ""
1564 {
1565 operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode));
1566 MEM_VOLATILE_P (operands[0]) = 1;
1567 })
1568
1569 (define_insn "*nvptx_membar_cta"
1570 [(set (match_operand:BLK 0 "" "")
1571 (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))]
1572 ""
1573 "\\tmembar.cta;"
1574 [(set_attr "predicable" "false")])
1575
1576 (define_insn "nvptx_nounroll"
1577 [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)]
1578 ""
1579 "\\t.pragma \\\"nounroll\\\";"
1580 [(set_attr "predicable" "false")])
1581
1582 (define_insn "nvptx_red_partition"
1583 [(set (match_operand:DI 0 "nonimmediate_operand" "=R")
1584 (unspec_volatile:DI [(match_operand:DI 1 "const_int_operand")]
1585 UNSPECV_RED_PART))]
1586 ""
1587 {
1588 return nvptx_output_red_partition (operands[0], operands[1]);
1589 }
1590 [(set_attr "predicable" "false")])