1 /* Target code for NVPTX.
2 Copyright (C) 2014-2015 Free Software Foundation, Inc.
3 Contributed by Bernd Schmidt <bernds@codesourcery.com>
5 This file is part of GCC.
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
24 #include "coretypes.h"
30 #include "insn-flags.h"
32 #include "insn-attr.h"
33 #include "insn-codes.h"
34 #include "hard-reg-set.h"
37 #include "insn-config.h"
52 #include "tm-constrs.h"
53 #include "langhooks.h"
56 #include "target-def.h"
57 #include "diagnostic.h"
59 #include "basic-block.h"
61 #include "stor-layout.h"
65 /* Record the function decls we've written, and the libfuncs and function
66 decls corresponding to them. */
67 static std::stringstream func_decls
;
69 struct declared_libfunc_hasher
: ggc_cache_hasher
<rtx
>
71 static hashval_t
hash (rtx x
) { return htab_hash_pointer (x
); }
72 static bool equal (rtx a
, rtx b
) { return a
== b
; }
76 hash_table
<declared_libfunc_hasher
> *declared_libfuncs_htab
;
78 struct tree_hasher
: ggc_cache_hasher
<tree
>
80 static hashval_t
hash (tree t
) { return htab_hash_pointer (t
); }
81 static bool equal (tree a
, tree b
) { return a
== b
; }
84 static GTY((cache
)) hash_table
<tree_hasher
> *declared_fndecls_htab
;
85 static GTY((cache
)) hash_table
<tree_hasher
> *needed_fndecls_htab
;
87 /* Allocate a new, cleared machine_function structure. */
89 static struct machine_function
*
90 nvptx_init_machine_status (void)
92 struct machine_function
*p
= ggc_cleared_alloc
<machine_function
> ();
93 p
->ret_reg_mode
= VOIDmode
;
97 /* Implement TARGET_OPTION_OVERRIDE. */
100 nvptx_option_override (void)
102 init_machine_status
= nvptx_init_machine_status
;
103 /* Gives us a predictable order, which we need especially for variables. */
104 flag_toplevel_reorder
= 1;
105 /* Assumes that it will see only hard registers. */
106 flag_var_tracking
= 0;
107 write_symbols
= NO_DEBUG
;
108 debug_info_level
= DINFO_LEVEL_NONE
;
110 declared_fndecls_htab
= hash_table
<tree_hasher
>::create_ggc (17);
111 needed_fndecls_htab
= hash_table
<tree_hasher
>::create_ggc (17);
112 declared_libfuncs_htab
113 = hash_table
<declared_libfunc_hasher
>::create_ggc (17);
116 /* Return the mode to be used when declaring a ptx object for OBJ.
117 For objects with subparts such as complex modes this is the mode
121 nvptx_underlying_object_mode (rtx obj
)
123 if (GET_CODE (obj
) == SUBREG
)
124 obj
= SUBREG_REG (obj
);
125 machine_mode mode
= GET_MODE (obj
);
128 if (COMPLEX_MODE_P (mode
))
129 return GET_MODE_INNER (mode
);
133 /* Return a ptx type for MODE. If PROMOTE, then use .u32 for QImode to
134 deal with ptx ideosyncracies. */
137 nvptx_ptx_type_from_mode (machine_mode mode
, bool promote
)
167 /* Return the number of pieces to use when dealing with a pseudo of *PMODE.
168 Alter *PMODE if we return a number greater than one. */
171 maybe_split_mode (machine_mode
*pmode
)
173 machine_mode mode
= *pmode
;
175 if (COMPLEX_MODE_P (mode
))
177 *pmode
= GET_MODE_INNER (mode
);
180 else if (mode
== TImode
)
188 /* Like maybe_split_mode, but only return whether or not the mode
189 needs to be split. */
191 nvptx_split_reg_p (machine_mode mode
)
193 if (COMPLEX_MODE_P (mode
))
200 #define PASS_IN_REG_P(MODE, TYPE) \
201 ((GET_MODE_CLASS (MODE) == MODE_INT \
202 || GET_MODE_CLASS (MODE) == MODE_FLOAT \
203 || ((GET_MODE_CLASS (MODE) == MODE_COMPLEX_INT \
204 || GET_MODE_CLASS (MODE) == MODE_COMPLEX_FLOAT) \
205 && !AGGREGATE_TYPE_P (TYPE))) \
208 #define RETURN_IN_REG_P(MODE) \
209 ((GET_MODE_CLASS (MODE) == MODE_INT \
210 || GET_MODE_CLASS (MODE) == MODE_FLOAT) \
211 && GET_MODE_SIZE (MODE) <= 8)
213 /* Perform a mode promotion for a function argument with MODE. Return
214 the promoted mode. */
217 arg_promotion (machine_mode mode
)
219 if (mode
== QImode
|| mode
== HImode
)
224 /* Write the declaration of a function arg of TYPE to S. I is the index
225 of the argument, MODE its mode. NO_ARG_TYPES is true if this is for
226 a decl with zero TYPE_ARG_TYPES, i.e. an old-style C decl. */
229 write_one_arg (std::stringstream
&s
, tree type
, int i
, machine_mode mode
,
232 if (!PASS_IN_REG_P (mode
, type
))
235 int count
= maybe_split_mode (&mode
);
239 write_one_arg (s
, NULL_TREE
, i
, mode
, false);
240 write_one_arg (s
, NULL_TREE
, i
+ 1, mode
, false);
244 if (no_arg_types
&& !AGGREGATE_TYPE_P (type
))
248 mode
= arg_promotion (mode
);
253 s
<< ".param" << nvptx_ptx_type_from_mode (mode
, false) << " %in_ar"
254 << (i
+ 1) << (mode
== QImode
|| mode
== HImode
? "[1]" : "");
256 s
<< "[" << int_size_in_bytes (type
) << "]";
260 /* Look for attributes in ATTRS that would indicate we must write a function
261 as a .entry kernel rather than a .func. Return true if one is found. */
264 write_as_kernel (tree attrs
)
266 return (lookup_attribute ("kernel", attrs
) != NULL_TREE
267 || lookup_attribute ("omp target entrypoint", attrs
) != NULL_TREE
);
270 /* Write a function decl for DECL to S, where NAME is the name to be used. */
273 nvptx_write_function_decl (std::stringstream
&s
, const char *name
, const_tree decl
)
275 tree fntype
= TREE_TYPE (decl
);
276 tree result_type
= TREE_TYPE (fntype
);
277 tree args
= TYPE_ARG_TYPES (fntype
);
278 tree attrs
= DECL_ATTRIBUTES (decl
);
279 bool kernel
= write_as_kernel (attrs
);
280 bool is_main
= strcmp (name
, "main") == 0;
281 bool args_from_decl
= false;
284 NULL in TYPE_ARG_TYPES, for old-style functions
285 NULL in DECL_ARGUMENTS, for builtin functions without another
287 So we have to pick the best one we have. */
290 args
= DECL_ARGUMENTS (decl
);
291 args_from_decl
= true;
294 if (DECL_EXTERNAL (decl
))
296 else if (TREE_PUBLIC (decl
))
304 /* Declare the result. */
305 bool return_in_mem
= false;
306 if (TYPE_MODE (result_type
) != VOIDmode
)
308 machine_mode mode
= TYPE_MODE (result_type
);
309 if (!RETURN_IN_REG_P (mode
))
310 return_in_mem
= true;
313 mode
= arg_promotion (mode
);
314 s
<< "(.param" << nvptx_ptx_type_from_mode (mode
, false)
324 /* Declare argument types. */
325 if ((args
!= NULL_TREE
326 && !(TREE_CODE (args
) == TREE_LIST
&& TREE_VALUE (args
) == void_type_node
))
329 || DECL_STATIC_CHAIN (decl
))
333 bool any_args
= false;
336 s
<< ".param.u" << GET_MODE_BITSIZE (Pmode
) << " %in_ar1";
339 while (args
!= NULL_TREE
)
341 tree type
= args_from_decl
? TREE_TYPE (args
) : TREE_VALUE (args
);
342 machine_mode mode
= TYPE_MODE (type
);
344 if (mode
!= VOIDmode
)
346 i
= write_one_arg (s
, type
, i
, mode
,
347 TYPE_ARG_TYPES (fntype
) == 0);
351 args
= TREE_CHAIN (args
);
353 if (stdarg_p (fntype
))
356 s
<< ", .param.u" << GET_MODE_BITSIZE (Pmode
) << " %in_argp";
358 if (DECL_STATIC_CHAIN (decl
))
362 s
<< ".reg.u" << GET_MODE_BITSIZE (Pmode
)
363 << reg_names
[STATIC_CHAIN_REGNUM
];
365 if (!any_args
&& is_main
)
366 s
<< ".param.u32 %argc, .param.u" << GET_MODE_BITSIZE (Pmode
)
372 /* Walk either ARGTYPES or ARGS if the former is null, and write out part of
373 the function header to FILE. If WRITE_COPY is false, write reg
374 declarations, otherwise write the copy from the incoming argument to that
375 reg. RETURN_IN_MEM indicates whether to start counting arg numbers at 1
379 walk_args_for_param (FILE *file
, tree argtypes
, tree args
, bool write_copy
,
384 bool args_from_decl
= false;
386 args_from_decl
= true;
390 for (i
= return_in_mem
? 1 : 0; args
!= NULL_TREE
; args
= TREE_CHAIN (args
))
392 tree type
= args_from_decl
? TREE_TYPE (args
) : TREE_VALUE (args
);
393 machine_mode mode
= TYPE_MODE (type
);
395 if (mode
== VOIDmode
)
398 if (!PASS_IN_REG_P (mode
, type
))
401 int count
= maybe_split_mode (&mode
);
404 if (argtypes
== NULL
&& !AGGREGATE_TYPE_P (type
))
410 mode
= arg_promotion (mode
);
416 fprintf (file
, "\tld.param%s %%ar%d, [%%in_ar%d];\n",
417 nvptx_ptx_type_from_mode (mode
, false), i
, i
);
419 fprintf (file
, "\t.reg%s %%ar%d;\n",
420 nvptx_ptx_type_from_mode (mode
, false), i
);
425 /* Write a .func or .kernel declaration (not a definition) along with
426 a helper comment for use by ld. S is the stream to write to, DECL
427 the decl for the function with name NAME. */
430 write_function_decl_and_comment (std::stringstream
&s
, const char *name
, const_tree decl
)
433 if (TREE_PUBLIC (decl
))
435 s
<< " FUNCTION DECL: ";
441 nvptx_write_function_decl (s
, name
, decl
);
445 /* Check NAME for special function names and redirect them by returning a
446 replacement. This applies to malloc, free and realloc, for which we
447 want to use libgcc wrappers, and call, which triggers a bug in ptxas. */
450 nvptx_name_replacement (const char *name
)
452 if (strcmp (name
, "call") == 0)
453 return "__nvptx_call";
454 if (strcmp (name
, "malloc") == 0)
455 return "__nvptx_malloc";
456 if (strcmp (name
, "free") == 0)
457 return "__nvptx_free";
458 if (strcmp (name
, "realloc") == 0)
459 return "__nvptx_realloc";
463 /* If DECL is a FUNCTION_DECL, check the hash table to see if we
464 already encountered it, and if not, insert it and write a ptx
465 declarations that will be output at the end of compilation. */
468 nvptx_record_fndecl (tree decl
, bool force
= false)
470 if (decl
== NULL_TREE
|| TREE_CODE (decl
) != FUNCTION_DECL
471 || !DECL_EXTERNAL (decl
))
474 if (!force
&& TYPE_ARG_TYPES (TREE_TYPE (decl
)) == NULL_TREE
)
477 tree
*slot
= declared_fndecls_htab
->find_slot (decl
, INSERT
);
481 const char *name
= get_fnname_from_decl (decl
);
482 name
= nvptx_name_replacement (name
);
483 write_function_decl_and_comment (func_decls
, name
, decl
);
488 /* Record that we need to emit a ptx decl for DECL. Either do it now, or
489 record it for later in case we have no argument information at this
493 nvptx_record_needed_fndecl (tree decl
)
495 if (nvptx_record_fndecl (decl
))
498 tree
*slot
= needed_fndecls_htab
->find_slot (decl
, INSERT
);
503 /* Implement ASM_DECLARE_FUNCTION_NAME. Writes the start of a ptx
504 function, including local var decls and copies from the arguments to
508 nvptx_declare_function_name (FILE *file
, const char *name
, const_tree decl
)
510 tree fntype
= TREE_TYPE (decl
);
511 tree result_type
= TREE_TYPE (fntype
);
513 name
= nvptx_name_replacement (name
);
516 write_function_decl_and_comment (s
, name
, decl
);
518 if (TREE_PUBLIC (decl
))
520 s
<< " FUNCTION DEF: ";
528 nvptx_write_function_decl (s
, name
, decl
);
529 fprintf (file
, "%s", s
.str().c_str());
531 bool return_in_mem
= false;
532 if (TYPE_MODE (result_type
) != VOIDmode
)
534 machine_mode mode
= TYPE_MODE (result_type
);
535 if (!RETURN_IN_REG_P (mode
))
536 return_in_mem
= true;
539 fprintf (file
, "\n{\n");
541 /* Ensure all arguments that should live in a register have one
542 declared. We'll emit the copies below. */
543 walk_args_for_param (file
, TYPE_ARG_TYPES (fntype
), DECL_ARGUMENTS (decl
),
544 false, return_in_mem
);
546 fprintf (file
, "\t.reg.u%d %%ar1;\n", GET_MODE_BITSIZE (Pmode
));
547 else if (TYPE_MODE (result_type
) != VOIDmode
)
549 machine_mode mode
= arg_promotion (TYPE_MODE (result_type
));
550 fprintf (file
, ".reg%s %%retval;\n",
551 nvptx_ptx_type_from_mode (mode
, false));
554 if (stdarg_p (fntype
))
555 fprintf (file
, "\t.reg.u%d %%argp;\n", GET_MODE_BITSIZE (Pmode
));
557 fprintf (file
, "\t.reg.u%d %s;\n", GET_MODE_BITSIZE (Pmode
),
558 reg_names
[OUTGOING_STATIC_CHAIN_REGNUM
]);
560 /* Declare the pseudos we have as ptx registers. */
561 int maxregs
= max_reg_num ();
562 for (int i
= LAST_VIRTUAL_REGISTER
+ 1; i
< maxregs
; i
++)
564 if (regno_reg_rtx
[i
] != const0_rtx
)
566 machine_mode mode
= PSEUDO_REGNO_MODE (i
);
567 int count
= maybe_split_mode (&mode
);
571 fprintf (file
, "\t.reg%s %%r%d$%d;\n",
572 nvptx_ptx_type_from_mode (mode
, true),
576 fprintf (file
, "\t.reg%s %%r%d;\n",
577 nvptx_ptx_type_from_mode (mode
, true),
582 /* The only reason we might be using outgoing args is if we call a stdargs
583 function. Allocate the space for this. If we called varargs functions
584 without passing any variadic arguments, we'll see a reference to outargs
585 even with a zero outgoing_args_size. */
586 HOST_WIDE_INT sz
= crtl
->outgoing_args_size
;
589 if (cfun
->machine
->has_call_with_varargs
)
590 fprintf (file
, "\t.reg.u%d %%outargs;\n"
591 "\t.local.align 8 .b8 %%outargs_ar[" HOST_WIDE_INT_PRINT_DEC
"];\n",
593 if (cfun
->machine
->punning_buffer_size
> 0)
594 fprintf (file
, "\t.reg.u%d %%punbuffer;\n"
595 "\t.local.align 8 .b8 %%punbuffer_ar[%d];\n",
596 BITS_PER_WORD
, cfun
->machine
->punning_buffer_size
);
598 /* Declare a local variable for the frame. */
599 sz
= get_frame_size ();
600 if (sz
> 0 || cfun
->machine
->has_call_with_sc
)
602 fprintf (file
, "\t.reg.u%d %%frame;\n"
603 "\t.local.align 8 .b8 %%farray[" HOST_WIDE_INT_PRINT_DEC
"];\n",
604 BITS_PER_WORD
, sz
== 0 ? 1 : sz
);
605 fprintf (file
, "\tcvta.local.u%d %%frame, %%farray;\n",
609 if (cfun
->machine
->has_call_with_varargs
)
610 fprintf (file
, "\tcvta.local.u%d %%outargs, %%outargs_ar;\n",
612 if (cfun
->machine
->punning_buffer_size
> 0)
613 fprintf (file
, "\tcvta.local.u%d %%punbuffer, %%punbuffer_ar;\n",
616 /* Now emit any copies necessary for arguments. */
617 walk_args_for_param (file
, TYPE_ARG_TYPES (fntype
), DECL_ARGUMENTS (decl
),
618 true, return_in_mem
);
620 fprintf (file
, "ld.param.u%d %%ar1, [%%in_ar1];\n",
621 GET_MODE_BITSIZE (Pmode
));
622 if (stdarg_p (fntype
))
623 fprintf (file
, "ld.param.u%d %%argp, [%%in_argp];\n",
624 GET_MODE_BITSIZE (Pmode
));
627 /* Output a return instruction. Also copy the return value to its outgoing
631 nvptx_output_return (void)
633 tree fntype
= TREE_TYPE (current_function_decl
);
634 tree result_type
= TREE_TYPE (fntype
);
635 if (TYPE_MODE (result_type
) != VOIDmode
)
637 machine_mode mode
= TYPE_MODE (result_type
);
638 if (RETURN_IN_REG_P (mode
))
640 mode
= arg_promotion (mode
);
641 fprintf (asm_out_file
, "\tst.param%s\t[%%out_retval], %%retval;\n",
642 nvptx_ptx_type_from_mode (mode
, false));
649 /* Construct a function declaration from a call insn. This can be
650 necessary for two reasons - either we have an indirect call which
651 requires a .callprototype declaration, or we have a libcall
652 generated by emit_library_call for which no decl exists. */
655 write_func_decl_from_insn (std::stringstream
&s
, rtx result
, rtx pat
,
658 bool callprototype
= register_operand (callee
, Pmode
);
659 const char *name
= "_";
662 name
= XSTR (callee
, 0);
663 name
= nvptx_name_replacement (name
);
664 s
<< "// BEGIN GLOBAL FUNCTION DECL: " << name
<< "\n";
666 s
<< (callprototype
? "\t.callprototype\t" : "\t.extern .func ");
668 if (result
!= NULL_RTX
)
671 s
<< nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result
)),
683 int nargs
= XVECLEN (pat
, 0) - 1;
687 for (int i
= 0; i
< nargs
; i
++)
689 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
690 machine_mode mode
= GET_MODE (t
);
691 int count
= maybe_split_mode (&mode
);
696 s
<< nvptx_ptx_type_from_mode (mode
, false);
702 if (mode
== QImode
|| mode
== HImode
)
704 if (i
+ 1 < nargs
|| count
> 0)
713 /* Terminate a function by writing a closing brace to FILE. */
716 nvptx_function_end (FILE *file
)
718 fprintf (file
, "\t}\n");
721 /* Decide whether we can make a sibling call to a function. For ptx, we
725 nvptx_function_ok_for_sibcall (tree
, tree
)
730 /* Implement the TARGET_CALL_ARGS hook. Record information about one
731 argument to the next call. */
734 nvptx_call_args (rtx arg
, tree funtype
)
736 if (cfun
->machine
->start_call
== NULL_RTX
)
738 cfun
->machine
->call_args
= NULL
;
739 cfun
->machine
->funtype
= funtype
;
740 cfun
->machine
->start_call
= const0_rtx
;
745 rtx_expr_list
*args_so_far
= cfun
->machine
->call_args
;
747 cfun
->machine
->call_args
= alloc_EXPR_LIST (VOIDmode
, arg
, args_so_far
);
750 /* Implement the corresponding END_CALL_ARGS hook. Clear and free the
751 information we recorded. */
754 nvptx_end_call_args (void)
756 cfun
->machine
->start_call
= NULL_RTX
;
757 free_EXPR_LIST_list (&cfun
->machine
->call_args
);
760 /* Emit the sequence for a call. */
763 nvptx_expand_call (rtx retval
, rtx address
)
766 rtx callee
= XEXP (address
, 0);
769 bool external_decl
= false;
772 for (t
= cfun
->machine
->call_args
; t
; t
= XEXP (t
, 1))
775 bool has_varargs
= false;
776 tree decl_type
= NULL_TREE
;
778 if (!call_insn_operand (callee
, Pmode
))
780 callee
= force_reg (Pmode
, callee
);
781 address
= change_address (address
, QImode
, callee
);
784 if (GET_CODE (callee
) == SYMBOL_REF
)
786 tree decl
= SYMBOL_REF_DECL (callee
);
787 if (decl
!= NULL_TREE
)
789 decl_type
= TREE_TYPE (decl
);
790 if (DECL_STATIC_CHAIN (decl
))
791 cfun
->machine
->has_call_with_sc
= true;
792 if (DECL_EXTERNAL (decl
))
793 external_decl
= true;
796 if (cfun
->machine
->funtype
797 /* It's possible to construct testcases where we call a variable.
798 See compile/20020129-1.c. stdarg_p will crash so avoid calling it
800 && (TREE_CODE (cfun
->machine
->funtype
) == FUNCTION_TYPE
801 || TREE_CODE (cfun
->machine
->funtype
) == METHOD_TYPE
)
802 && stdarg_p (cfun
->machine
->funtype
))
805 cfun
->machine
->has_call_with_varargs
= true;
807 vec
= rtvec_alloc (nargs
+ 1 + (has_varargs
? 1 : 0));
808 pat
= gen_rtx_PARALLEL (VOIDmode
, vec
);
811 rtx this_arg
= gen_reg_rtx (Pmode
);
813 emit_move_insn (this_arg
, stack_pointer_rtx
);
815 emit_move_insn (this_arg
, stack_pointer_rtx
);
816 XVECEXP (pat
, 0, nargs
+ 1) = gen_rtx_USE (VOIDmode
, this_arg
);
821 for (i
= 1, arg
= cfun
->machine
->call_args
; arg
; arg
= XEXP (arg
, 1), i
++)
823 rtx this_arg
= XEXP (arg
, 0);
824 XVECEXP (pat
, 0, i
) = gen_rtx_USE (VOIDmode
, this_arg
);
827 rtx tmp_retval
= retval
;
828 t
= gen_rtx_CALL (VOIDmode
, address
, const0_rtx
);
829 if (retval
!= NULL_RTX
)
831 if (!nvptx_register_operand (retval
, GET_MODE (retval
)))
832 tmp_retval
= gen_reg_rtx (GET_MODE (retval
));
833 t
= gen_rtx_SET (tmp_retval
, t
);
835 XVECEXP (pat
, 0, 0) = t
;
837 && (decl_type
== NULL_TREE
838 || (external_decl
&& TYPE_ARG_TYPES (decl_type
) == NULL_TREE
)))
840 rtx
*slot
= declared_libfuncs_htab
->find_slot (callee
, INSERT
);
844 write_func_decl_from_insn (func_decls
, retval
, pat
, callee
);
847 emit_call_insn (pat
);
848 if (tmp_retval
!= retval
)
849 emit_move_insn (retval
, tmp_retval
);
852 /* Implement TARGET_FUNCTION_ARG. */
855 nvptx_function_arg (cumulative_args_t
, machine_mode mode
,
856 const_tree
, bool named
)
858 if (mode
== VOIDmode
)
862 return gen_reg_rtx (mode
);
866 /* Implement TARGET_FUNCTION_INCOMING_ARG. */
869 nvptx_function_incoming_arg (cumulative_args_t cum_v
, machine_mode mode
,
870 const_tree
, bool named
)
872 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
873 if (mode
== VOIDmode
)
879 /* No need to deal with split modes here, the only case that can
880 happen is complex modes and those are dealt with by
881 TARGET_SPLIT_COMPLEX_ARG. */
882 return gen_rtx_UNSPEC (mode
,
883 gen_rtvec (1, GEN_INT (1 + cum
->count
)),
887 /* Implement TARGET_FUNCTION_ARG_ADVANCE. */
890 nvptx_function_arg_advance (cumulative_args_t cum_v
, machine_mode mode
,
891 const_tree type ATTRIBUTE_UNUSED
,
892 bool named ATTRIBUTE_UNUSED
)
894 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
901 /* Handle the TARGET_STRICT_ARGUMENT_NAMING target hook.
903 For nvptx, we know how to handle functions declared as stdarg: by
904 passing an extra pointer to the unnamed arguments. However, the
905 Fortran frontend can produce a different situation, where a
906 function pointer is declared with no arguments, but the actual
907 function and calls to it take more arguments. In that case, we
908 want to ensure the call matches the definition of the function. */
911 nvptx_strict_argument_naming (cumulative_args_t cum_v
)
913 CUMULATIVE_ARGS
*cum
= get_cumulative_args (cum_v
);
914 return cum
->fntype
== NULL_TREE
|| stdarg_p (cum
->fntype
);
917 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. */
920 nvptx_function_arg_boundary (machine_mode mode
, const_tree type
)
922 unsigned int boundary
= type
? TYPE_ALIGN (type
) : GET_MODE_BITSIZE (mode
);
924 if (boundary
> BITS_PER_WORD
)
925 return 2 * BITS_PER_WORD
;
929 HOST_WIDE_INT size
= int_size_in_bytes (type
);
931 return 2 * BITS_PER_WORD
;
932 if (boundary
< BITS_PER_WORD
)
935 return BITS_PER_WORD
;
937 return 2 * BITS_PER_UNIT
;
943 /* TARGET_FUNCTION_VALUE implementation. Returns an RTX representing the place
944 where function FUNC returns or receives a value of data type TYPE. */
947 nvptx_function_value (const_tree type
, const_tree func ATTRIBUTE_UNUSED
,
950 int unsignedp
= TYPE_UNSIGNED (type
);
951 machine_mode orig_mode
= TYPE_MODE (type
);
952 machine_mode mode
= promote_function_mode (type
, orig_mode
,
953 &unsignedp
, NULL_TREE
, 1);
955 return gen_rtx_REG (mode
, NVPTX_RETURN_REGNUM
);
956 if (cfun
->machine
->start_call
== NULL_RTX
)
957 /* Pretend to return in a hard reg for early uses before pseudos can be
959 return gen_rtx_REG (mode
, NVPTX_RETURN_REGNUM
);
960 return gen_reg_rtx (mode
);
963 /* Implement TARGET_LIBCALL_VALUE. */
966 nvptx_libcall_value (machine_mode mode
, const_rtx
)
968 if (cfun
->machine
->start_call
== NULL_RTX
)
969 /* Pretend to return in a hard reg for early uses before pseudos can be
971 return gen_rtx_REG (mode
, NVPTX_RETURN_REGNUM
);
972 return gen_reg_rtx (mode
);
975 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
978 nvptx_function_value_regno_p (const unsigned int regno
)
980 return regno
== NVPTX_RETURN_REGNUM
;
983 /* Types with a mode other than those supported by the machine are passed by
984 reference in memory. */
987 nvptx_pass_by_reference (cumulative_args_t
, machine_mode mode
,
988 const_tree type
, bool)
990 return !PASS_IN_REG_P (mode
, type
);
993 /* Implement TARGET_RETURN_IN_MEMORY. */
996 nvptx_return_in_memory (const_tree type
, const_tree
)
998 machine_mode mode
= TYPE_MODE (type
);
999 if (!RETURN_IN_REG_P (mode
))
1004 /* Implement TARGET_PROMOTE_FUNCTION_MODE. */
1007 nvptx_promote_function_mode (const_tree type
, machine_mode mode
,
1009 const_tree funtype
, int for_return
)
1011 if (type
== NULL_TREE
)
1014 return promote_mode (type
, mode
, punsignedp
);
1015 /* For K&R-style functions, try to match the language promotion rules to
1016 minimize type mismatches at assembly time. */
1017 if (TYPE_ARG_TYPES (funtype
) == NULL_TREE
1018 && type
!= NULL_TREE
1019 && !AGGREGATE_TYPE_P (type
))
1023 mode
= arg_promotion (mode
);
1029 /* Implement TARGET_STATIC_CHAIN. */
1032 nvptx_static_chain (const_tree fndecl
, bool incoming_p
)
1034 if (!DECL_STATIC_CHAIN (fndecl
))
1038 return gen_rtx_REG (Pmode
, STATIC_CHAIN_REGNUM
);
1040 return gen_rtx_REG (Pmode
, OUTGOING_STATIC_CHAIN_REGNUM
);
1043 /* Emit a comparison COMPARE, and return the new test to be used in the
1047 nvptx_expand_compare (rtx compare
)
1049 rtx pred
= gen_reg_rtx (BImode
);
1050 rtx cmp
= gen_rtx_fmt_ee (GET_CODE (compare
), BImode
,
1051 XEXP (compare
, 0), XEXP (compare
, 1));
1052 emit_insn (gen_rtx_SET (pred
, cmp
));
1053 return gen_rtx_NE (BImode
, pred
, const0_rtx
);
1056 /* When loading an operand ORIG_OP, verify whether an address space
1057 conversion to generic is required, and if so, perform it. Also
1058 check for SYMBOL_REFs for function decls and call
1059 nvptx_record_needed_fndecl as needed.
1060 Return either the original operand, or the converted one. */
1063 nvptx_maybe_convert_symbolic_operand (rtx orig_op
)
1065 if (GET_MODE (orig_op
) != Pmode
)
1069 while (GET_CODE (op
) == PLUS
|| GET_CODE (op
) == CONST
)
1071 if (GET_CODE (op
) != SYMBOL_REF
)
1074 tree decl
= SYMBOL_REF_DECL (op
);
1075 if (decl
&& TREE_CODE (decl
) == FUNCTION_DECL
)
1077 nvptx_record_needed_fndecl (decl
);
1081 addr_space_t as
= nvptx_addr_space_from_address (op
);
1082 if (as
== ADDR_SPACE_GENERIC
)
1086 code
= (as
== ADDR_SPACE_GLOBAL
? UNSPEC_FROM_GLOBAL
1087 : as
== ADDR_SPACE_LOCAL
? UNSPEC_FROM_LOCAL
1088 : as
== ADDR_SPACE_SHARED
? UNSPEC_FROM_SHARED
1089 : as
== ADDR_SPACE_CONST
? UNSPEC_FROM_CONST
1090 : UNSPEC_FROM_PARAM
);
1091 rtx dest
= gen_reg_rtx (Pmode
);
1092 emit_insn (gen_rtx_SET (dest
, gen_rtx_UNSPEC (Pmode
, gen_rtvec (1, orig_op
),
1097 /* Returns true if X is a valid address for use in a memory reference. */
1100 nvptx_legitimate_address_p (machine_mode
, rtx x
, bool)
1102 enum rtx_code code
= GET_CODE (x
);
1110 if (REG_P (XEXP (x
, 0)) && CONST_INT_P (XEXP (x
, 1)))
1124 /* Implement HARD_REGNO_MODE_OK. We barely use hard regs, but we want
1125 to ensure that the return register's mode isn't changed. */
1128 nvptx_hard_regno_mode_ok (int regno
, machine_mode mode
)
1130 if (regno
!= NVPTX_RETURN_REGNUM
1131 || cfun
== NULL
|| cfun
->machine
->ret_reg_mode
== VOIDmode
)
1133 return mode
== cfun
->machine
->ret_reg_mode
;
1136 /* Convert an address space AS to the corresponding ptx string. */
1139 nvptx_section_from_addr_space (addr_space_t as
)
1143 case ADDR_SPACE_CONST
:
1146 case ADDR_SPACE_GLOBAL
:
1149 case ADDR_SPACE_SHARED
:
1152 case ADDR_SPACE_GENERIC
:
1160 /* Determine whether DECL goes into .const or .global. */
1163 nvptx_section_for_decl (const_tree decl
)
1165 bool is_const
= (CONSTANT_CLASS_P (decl
)
1166 || TREE_CODE (decl
) == CONST_DECL
1167 || TREE_READONLY (decl
));
1174 /* Look for a SYMBOL_REF in ADDR and return the address space to be used
1175 for the insn referencing this address. */
1178 nvptx_addr_space_from_address (rtx addr
)
1180 while (GET_CODE (addr
) == PLUS
|| GET_CODE (addr
) == CONST
)
1181 addr
= XEXP (addr
, 0);
1182 if (GET_CODE (addr
) != SYMBOL_REF
)
1183 return ADDR_SPACE_GENERIC
;
1185 tree decl
= SYMBOL_REF_DECL (addr
);
1186 if (decl
== NULL_TREE
|| TREE_CODE (decl
) == FUNCTION_DECL
)
1187 return ADDR_SPACE_GENERIC
;
1189 bool is_const
= (CONSTANT_CLASS_P (decl
)
1190 || TREE_CODE (decl
) == CONST_DECL
1191 || TREE_READONLY (decl
));
1193 return ADDR_SPACE_CONST
;
1195 return ADDR_SPACE_GLOBAL
;
1198 /* Machinery to output constant initializers. */
1200 /* Used when assembling integers to ensure data is emitted in
1201 pieces whose size matches the declaration we printed. */
1202 static unsigned int decl_chunk_size
;
1203 static machine_mode decl_chunk_mode
;
1204 /* Used in the same situation, to keep track of the byte offset
1205 into the initializer. */
1206 static unsigned HOST_WIDE_INT decl_offset
;
1207 /* The initializer part we are currently processing. */
1208 static HOST_WIDE_INT init_part
;
1209 /* The total size of the object. */
1210 static unsigned HOST_WIDE_INT object_size
;
1211 /* True if we found a skip extending to the end of the object. Used to
1212 assert that no data follows. */
1213 static bool object_finished
;
1215 /* Write the necessary separator string to begin a new initializer value. */
1218 begin_decl_field (void)
1220 /* We never see decl_offset at zero by the time we get here. */
1221 if (decl_offset
== decl_chunk_size
)
1222 fprintf (asm_out_file
, " = { ");
1224 fprintf (asm_out_file
, ", ");
1227 /* Output the currently stored chunk as an initializer value. */
1230 output_decl_chunk (void)
1232 begin_decl_field ();
1233 output_address (gen_int_mode (init_part
, decl_chunk_mode
));
1237 /* Add value VAL sized SIZE to the data we're emitting, and keep writing
1238 out chunks as they fill up. */
1241 nvptx_assemble_value (HOST_WIDE_INT val
, unsigned int size
)
1243 unsigned HOST_WIDE_INT chunk_offset
= decl_offset
% decl_chunk_size
;
1244 gcc_assert (!object_finished
);
1247 int this_part
= size
;
1248 if (chunk_offset
+ this_part
> decl_chunk_size
)
1249 this_part
= decl_chunk_size
- chunk_offset
;
1250 HOST_WIDE_INT val_part
;
1251 HOST_WIDE_INT mask
= 2;
1252 mask
<<= this_part
* BITS_PER_UNIT
- 1;
1253 val_part
= val
& (mask
- 1);
1254 init_part
|= val_part
<< (BITS_PER_UNIT
* chunk_offset
);
1255 val
>>= BITS_PER_UNIT
* this_part
;
1257 decl_offset
+= this_part
;
1258 if (decl_offset
% decl_chunk_size
== 0)
1259 output_decl_chunk ();
1265 /* Target hook for assembling integer object X of size SIZE. */
1268 nvptx_assemble_integer (rtx x
, unsigned int size
, int ARG_UNUSED (aligned_p
))
1270 if (GET_CODE (x
) == SYMBOL_REF
|| GET_CODE (x
) == CONST
)
1272 gcc_assert (size
= decl_chunk_size
);
1273 if (decl_offset
% decl_chunk_size
!= 0)
1274 sorry ("cannot emit unaligned pointers in ptx assembly");
1275 decl_offset
+= size
;
1276 begin_decl_field ();
1278 HOST_WIDE_INT off
= 0;
1279 if (GET_CODE (x
) == CONST
)
1281 if (GET_CODE (x
) == PLUS
)
1283 off
= INTVAL (XEXP (x
, 1));
1286 if (GET_CODE (x
) == SYMBOL_REF
)
1288 nvptx_record_needed_fndecl (SYMBOL_REF_DECL (x
));
1289 fprintf (asm_out_file
, "generic(");
1291 fprintf (asm_out_file
, ")");
1294 fprintf (asm_out_file
, " + " HOST_WIDE_INT_PRINT_DEC
, off
);
1299 switch (GET_CODE (x
))
1311 nvptx_assemble_value (val
, size
);
1315 /* Output SIZE zero bytes. We ignore the FILE argument since the
1316 functions we're calling to perform the output just use
1320 nvptx_output_skip (FILE *, unsigned HOST_WIDE_INT size
)
1322 if (decl_offset
+ size
>= object_size
)
1324 if (decl_offset
% decl_chunk_size
!= 0)
1325 nvptx_assemble_value (0, decl_chunk_size
);
1326 object_finished
= true;
1330 while (size
> decl_chunk_size
)
1332 nvptx_assemble_value (0, decl_chunk_size
);
1333 size
-= decl_chunk_size
;
1336 nvptx_assemble_value (0, 1);
1339 /* Output a string STR with length SIZE. As in nvptx_output_skip we
1340 ignore the FILE arg. */
1343 nvptx_output_ascii (FILE *, const char *str
, unsigned HOST_WIDE_INT size
)
1345 for (unsigned HOST_WIDE_INT i
= 0; i
< size
; i
++)
1346 nvptx_assemble_value (str
[i
], 1);
1349 /* Called when the initializer for a decl has been completely output through
1350 combinations of the three functions above. */
1353 nvptx_assemble_decl_end (void)
1355 if (decl_offset
!= 0)
1357 if (!object_finished
&& decl_offset
% decl_chunk_size
!= 0)
1358 nvptx_assemble_value (0, decl_chunk_size
);
1360 fprintf (asm_out_file
, " }");
1362 fprintf (asm_out_file
, ";\n");
1365 /* Start a declaration of a variable of TYPE with NAME to
1366 FILE. IS_PUBLIC says whether this will be externally visible.
1367 Here we just write the linker hint and decide on the chunk size
1371 init_output_initializer (FILE *file
, const char *name
, const_tree type
,
1374 fprintf (file
, "// BEGIN%s VAR DEF: ", is_public
? " GLOBAL" : "");
1375 assemble_name_raw (file
, name
);
1378 if (TREE_CODE (type
) == ARRAY_TYPE
)
1379 type
= TREE_TYPE (type
);
1380 int sz
= int_size_in_bytes (type
);
1381 if ((TREE_CODE (type
) != INTEGER_TYPE
1382 && TREE_CODE (type
) != ENUMERAL_TYPE
1383 && TREE_CODE (type
) != REAL_TYPE
)
1385 || sz
> HOST_BITS_PER_WIDE_INT
)
1386 type
= ptr_type_node
;
1387 decl_chunk_size
= int_size_in_bytes (type
);
1388 decl_chunk_mode
= int_mode_for_mode (TYPE_MODE (type
));
1391 object_finished
= false;
1394 /* Implement TARGET_ASM_DECLARE_CONSTANT_NAME. Begin the process of
1395 writing a constant variable EXP with NAME and SIZE and its
1396 initializer to FILE. */
1399 nvptx_asm_declare_constant_name (FILE *file
, const char *name
,
1400 const_tree exp
, HOST_WIDE_INT size
)
1402 tree type
= TREE_TYPE (exp
);
1403 init_output_initializer (file
, name
, type
, false);
1404 fprintf (file
, "\t.const .align %d .u%d ",
1405 TYPE_ALIGN (TREE_TYPE (exp
)) / BITS_PER_UNIT
,
1406 decl_chunk_size
* BITS_PER_UNIT
);
1407 assemble_name (file
, name
);
1408 fprintf (file
, "[" HOST_WIDE_INT_PRINT_DEC
"]",
1409 (size
+ decl_chunk_size
- 1) / decl_chunk_size
);
1413 /* Implement the ASM_DECLARE_OBJECT_NAME macro. Used to start writing
1414 a variable DECL with NAME to FILE. */
1417 nvptx_declare_object_name (FILE *file
, const char *name
, const_tree decl
)
1419 if (decl
&& DECL_SIZE (decl
))
1421 tree type
= TREE_TYPE (decl
);
1422 unsigned HOST_WIDE_INT size
;
1424 init_output_initializer (file
, name
, type
, TREE_PUBLIC (decl
));
1425 size
= tree_to_uhwi (DECL_SIZE_UNIT (decl
));
1426 const char *section
= nvptx_section_for_decl (decl
);
1427 fprintf (file
, "\t%s%s .align %d .u%d ",
1428 TREE_PUBLIC (decl
) ? " .visible" : "", section
,
1429 DECL_ALIGN (decl
) / BITS_PER_UNIT
,
1430 decl_chunk_size
* BITS_PER_UNIT
);
1431 assemble_name (file
, name
);
1433 fprintf (file
, "[" HOST_WIDE_INT_PRINT_DEC
"]",
1434 (size
+ decl_chunk_size
- 1) / decl_chunk_size
);
1436 object_finished
= true;
1441 /* Implement TARGET_ASM_GLOBALIZE_LABEL by doing nothing. */
1444 nvptx_globalize_label (FILE *, const char *)
1448 /* Implement TARGET_ASM_ASSEMBLE_UNDEFINED_DECL. Write an extern
1449 declaration only for variable DECL with NAME to FILE. */
1451 nvptx_assemble_undefined_decl (FILE *file
, const char *name
, const_tree decl
)
1453 if (TREE_CODE (decl
) != VAR_DECL
)
1455 const char *section
= nvptx_section_for_decl (decl
);
1456 fprintf (file
, "// BEGIN%s VAR DECL: ", TREE_PUBLIC (decl
) ? " GLOBAL" : "");
1457 assemble_name_raw (file
, name
);
1459 HOST_WIDE_INT size
= int_size_in_bytes (TREE_TYPE (decl
));
1460 fprintf (file
, ".extern %s .b8 ", section
);
1461 assemble_name_raw (file
, name
);
1463 fprintf (file
, "[" HOST_WIDE_INT_PRINT_DEC
"]", size
);
1464 fprintf (file
, ";\n\n");
1467 /* Output INSN, which is a call to CALLEE with result RESULT. For ptx, this
1468 involves writing .param declarations and in/out copies into them. */
1471 nvptx_output_call_insn (rtx_insn
*insn
, rtx result
, rtx callee
)
1475 bool needs_tgt
= register_operand (callee
, Pmode
);
1476 rtx pat
= PATTERN (insn
);
1477 int nargs
= XVECLEN (pat
, 0) - 1;
1478 tree decl
= NULL_TREE
;
1480 fprintf (asm_out_file
, "\t{\n");
1483 fprintf (asm_out_file
, "\t\t.param%s %%retval_in;\n",
1484 nvptx_ptx_type_from_mode (arg_promotion (GET_MODE (result
)),
1488 if (GET_CODE (callee
) == SYMBOL_REF
)
1490 decl
= SYMBOL_REF_DECL (callee
);
1491 if (decl
&& DECL_EXTERNAL (decl
))
1492 nvptx_record_fndecl (decl
);
1497 ASM_GENERATE_INTERNAL_LABEL (buf
, "LCT", labelno
);
1499 ASM_OUTPUT_LABEL (asm_out_file
, buf
);
1500 std::stringstream s
;
1501 write_func_decl_from_insn (s
, result
, pat
, callee
);
1502 fputs (s
.str().c_str(), asm_out_file
);
1505 for (int i
= 0, argno
= 0; i
< nargs
; i
++)
1507 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
1508 machine_mode mode
= GET_MODE (t
);
1509 int count
= maybe_split_mode (&mode
);
1512 fprintf (asm_out_file
, "\t\t.param%s %%out_arg%d%s;\n",
1513 nvptx_ptx_type_from_mode (mode
, false), argno
++,
1514 mode
== QImode
|| mode
== HImode
? "[1]" : "");
1516 for (int i
= 0, argno
= 0; i
< nargs
; i
++)
1518 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
1519 gcc_assert (REG_P (t
));
1520 machine_mode mode
= GET_MODE (t
);
1521 int count
= maybe_split_mode (&mode
);
1524 fprintf (asm_out_file
, "\t\tst.param%s [%%out_arg%d], %%r%d;\n",
1525 nvptx_ptx_type_from_mode (mode
, false), argno
++,
1531 fprintf (asm_out_file
, "\t\tst.param%s [%%out_arg%d], %%r%d$%d;\n",
1532 nvptx_ptx_type_from_mode (mode
, false), argno
++,
1537 fprintf (asm_out_file
, "\t\tcall ");
1538 if (result
!= NULL_RTX
)
1539 fprintf (asm_out_file
, "(%%retval_in), ");
1543 const char *name
= get_fnname_from_decl (decl
);
1544 name
= nvptx_name_replacement (name
);
1545 assemble_name (asm_out_file
, name
);
1548 output_address (callee
);
1550 if (nargs
> 0 || (decl
&& DECL_STATIC_CHAIN (decl
)))
1552 fprintf (asm_out_file
, ", (");
1554 for (i
= 0, argno
= 0; i
< nargs
; i
++)
1556 rtx t
= XEXP (XVECEXP (pat
, 0, i
+ 1), 0);
1557 machine_mode mode
= GET_MODE (t
);
1558 int count
= maybe_split_mode (&mode
);
1562 fprintf (asm_out_file
, "%%out_arg%d", argno
++);
1563 if (i
+ 1 < nargs
|| count
> 0)
1564 fprintf (asm_out_file
, ", ");
1567 if (decl
&& DECL_STATIC_CHAIN (decl
))
1570 fprintf (asm_out_file
, ", ");
1571 fprintf (asm_out_file
, "%s",
1572 reg_names
[OUTGOING_STATIC_CHAIN_REGNUM
]);
1575 fprintf (asm_out_file
, ")");
1579 fprintf (asm_out_file
, ", ");
1580 assemble_name (asm_out_file
, buf
);
1582 fprintf (asm_out_file
, ";\n");
1583 if (result
!= NULL_RTX
)
1584 return "ld.param%t0\t%0, [%%retval_in];\n\t}";
1589 /* Implement TARGET_PRINT_OPERAND_PUNCT_VALID_P. */
1592 nvptx_print_operand_punct_valid_p (unsigned char c
)
1594 return c
== '.' || c
== '#';
1597 static void nvptx_print_operand (FILE *, rtx
, int);
1599 /* Subroutine of nvptx_print_operand; used to print a memory reference X to FILE. */
1602 nvptx_print_address_operand (FILE *file
, rtx x
, machine_mode
)
1605 if (GET_CODE (x
) == CONST
)
1607 switch (GET_CODE (x
))
1611 output_address (XEXP (x
, 0));
1612 fprintf (file
, "+");
1613 output_address (off
);
1618 output_addr_const (file
, x
);
1622 gcc_assert (GET_CODE (x
) != MEM
);
1623 nvptx_print_operand (file
, x
, 0);
1628 /* Write assembly language output for the address ADDR to FILE. */
1631 nvptx_print_operand_address (FILE *file
, rtx addr
)
1633 nvptx_print_address_operand (file
, addr
, VOIDmode
);
1636 /* Print an operand, X, to FILE, with an optional modifier in CODE.
1639 . -- print the predicate for the instruction or an emptry string for an
1641 # -- print a rounding mode for the instruction
1643 A -- print an address space identifier for a MEM
1644 c -- print an opcode suffix for a comparison operator, including a type code
1645 d -- print a CONST_INT as a vector dimension (x, y, or z)
1646 f -- print a full reg even for something that must always be split
1647 t -- print a type opcode suffix, promoting QImode to 32 bits
1648 T -- print a type size in bits
1649 u -- print a type opcode suffix without promotions. */
1652 nvptx_print_operand (FILE *file
, rtx x
, int code
)
1655 machine_mode op_mode
;
1659 x
= current_insn_predicate
;
1662 unsigned int regno
= REGNO (XEXP (x
, 0));
1664 if (GET_CODE (x
) == EQ
)
1666 fputs (reg_names
[regno
], file
);
1671 else if (code
== '#')
1673 fputs (".rn", file
);
1677 enum rtx_code x_code
= GET_CODE (x
);
1683 addr_space_t as
= nvptx_addr_space_from_address (XEXP (x
, 0));
1684 fputs (nvptx_section_from_addr_space (as
), file
);
1689 gcc_assert (x_code
== CONST_INT
);
1690 if (INTVAL (x
) == 0)
1692 else if (INTVAL (x
) == 1)
1694 else if (INTVAL (x
) == 2)
1701 op_mode
= nvptx_underlying_object_mode (x
);
1702 fprintf (file
, "%s", nvptx_ptx_type_from_mode (op_mode
, true));
1706 op_mode
= nvptx_underlying_object_mode (x
);
1707 fprintf (file
, "%s", nvptx_ptx_type_from_mode (op_mode
, false));
1711 fprintf (file
, "%d", GET_MODE_BITSIZE (GET_MODE (x
)));
1715 fprintf (file
, "@");
1719 fprintf (file
, "@!");
1723 op_mode
= GET_MODE (XEXP (x
, 0));
1727 fputs (".eq", file
);
1730 if (FLOAT_MODE_P (op_mode
))
1731 fputs (".neu", file
);
1733 fputs (".ne", file
);
1736 fputs (".le", file
);
1739 fputs (".ge", file
);
1742 fputs (".lt", file
);
1745 fputs (".gt", file
);
1748 fputs (".ls", file
);
1751 fputs (".hs", file
);
1754 fputs (".lo", file
);
1757 fputs (".hi", file
);
1760 fputs (".ne", file
);
1763 fputs (".equ", file
);
1766 fputs (".leu", file
);
1769 fputs (".geu", file
);
1772 fputs (".ltu", file
);
1775 fputs (".gtu", file
);
1778 fputs (".nan", file
);
1781 fputs (".num", file
);
1786 if (FLOAT_MODE_P (op_mode
)
1787 || x_code
== EQ
|| x_code
== NE
1788 || x_code
== GEU
|| x_code
== GTU
1789 || x_code
== LEU
|| x_code
== LTU
)
1790 fputs (nvptx_ptx_type_from_mode (op_mode
, true), file
);
1792 fprintf (file
, ".s%d", GET_MODE_BITSIZE (op_mode
));
1803 if (HARD_REGISTER_P (x
))
1804 fprintf (file
, "%s", reg_names
[REGNO (x
)]);
1806 fprintf (file
, "%%r%d", REGNO (x
));
1807 if (code
!= 'f' && nvptx_split_reg_p (GET_MODE (x
)))
1809 gcc_assert (GET_CODE (orig_x
) == SUBREG
1810 && !nvptx_split_reg_p (GET_MODE (orig_x
)));
1811 fprintf (file
, "$%d", SUBREG_BYTE (orig_x
) / UNITS_PER_WORD
);
1817 nvptx_print_address_operand (file
, XEXP (x
, 0), GET_MODE (x
));
1822 output_addr_const (file
, x
);
1828 /* We could use output_addr_const, but that can print things like
1829 "x-8", which breaks ptxas. Need to ensure it is output as
1831 nvptx_print_address_operand (file
, x
, VOIDmode
);
1836 REAL_VALUE_TYPE real
;
1837 REAL_VALUE_FROM_CONST_DOUBLE (real
, x
);
1838 real_to_target (vals
, &real
, GET_MODE (x
));
1839 vals
[0] &= 0xffffffff;
1840 vals
[1] &= 0xffffffff;
1841 if (GET_MODE (x
) == SFmode
)
1842 fprintf (file
, "0f%08lx", vals
[0]);
1844 fprintf (file
, "0d%08lx%08lx", vals
[1], vals
[0]);
1848 output_addr_const (file
, x
);
1853 /* Record replacement regs used to deal with subreg operands. */
1856 rtx replacement
[MAX_RECOG_OPERANDS
];
1862 /* Allocate or reuse a replacement in R and return the rtx. */
1865 get_replacement (struct reg_replace
*r
)
1867 if (r
->n_allocated
== r
->n_in_use
)
1868 r
->replacement
[r
->n_allocated
++] = gen_reg_rtx (r
->mode
);
1869 return r
->replacement
[r
->n_in_use
++];
1872 /* Clean up subreg operands. In ptx assembly, everything is typed, and
1873 the presence of subregs would break the rules for most instructions.
1874 Replace them with a suitable new register of the right size, plus
1875 conversion copyin/copyout instructions. */
1880 struct reg_replace qiregs
, hiregs
, siregs
, diregs
;
1881 rtx_insn
*insn
, *next
;
1883 /* We are freeing block_for_insn in the toplev to keep compatibility
1884 with old MDEP_REORGS that are not CFG based. Recompute it now. */
1885 compute_bb_for_insn ();
1887 df_clear_flags (DF_LR_RUN_DCE
);
1890 thread_prologue_and_epilogue_insns ();
1892 qiregs
.n_allocated
= 0;
1893 hiregs
.n_allocated
= 0;
1894 siregs
.n_allocated
= 0;
1895 diregs
.n_allocated
= 0;
1896 qiregs
.mode
= QImode
;
1897 hiregs
.mode
= HImode
;
1898 siregs
.mode
= SImode
;
1899 diregs
.mode
= DImode
;
1901 for (insn
= get_insns (); insn
; insn
= next
)
1903 next
= NEXT_INSN (insn
);
1904 if (!NONDEBUG_INSN_P (insn
)
1905 || asm_noperands (insn
) >= 0
1906 || GET_CODE (PATTERN (insn
)) == USE
1907 || GET_CODE (PATTERN (insn
)) == CLOBBER
)
1909 qiregs
.n_in_use
= 0;
1910 hiregs
.n_in_use
= 0;
1911 siregs
.n_in_use
= 0;
1912 diregs
.n_in_use
= 0;
1913 extract_insn (insn
);
1914 enum attr_subregs_ok s_ok
= get_attr_subregs_ok (insn
);
1915 for (int i
= 0; i
< recog_data
.n_operands
; i
++)
1917 rtx op
= recog_data
.operand
[i
];
1918 if (GET_CODE (op
) != SUBREG
)
1921 rtx inner
= SUBREG_REG (op
);
1923 machine_mode outer_mode
= GET_MODE (op
);
1924 machine_mode inner_mode
= GET_MODE (inner
);
1927 && (GET_MODE_PRECISION (inner_mode
)
1928 >= GET_MODE_PRECISION (outer_mode
)))
1930 gcc_assert (SCALAR_INT_MODE_P (outer_mode
));
1931 struct reg_replace
*r
= (outer_mode
== QImode
? &qiregs
1932 : outer_mode
== HImode
? &hiregs
1933 : outer_mode
== SImode
? &siregs
1935 rtx new_reg
= get_replacement (r
);
1937 if (recog_data
.operand_type
[i
] != OP_OUT
)
1940 if (GET_MODE_PRECISION (inner_mode
)
1941 < GET_MODE_PRECISION (outer_mode
))
1946 rtx pat
= gen_rtx_SET (new_reg
,
1947 gen_rtx_fmt_e (code
, outer_mode
, inner
));
1948 emit_insn_before (pat
, insn
);
1951 if (recog_data
.operand_type
[i
] != OP_IN
)
1954 if (GET_MODE_PRECISION (inner_mode
)
1955 < GET_MODE_PRECISION (outer_mode
))
1960 rtx pat
= gen_rtx_SET (inner
,
1961 gen_rtx_fmt_e (code
, inner_mode
, new_reg
));
1962 emit_insn_after (pat
, insn
);
1964 validate_change (insn
, recog_data
.operand_loc
[i
], new_reg
, false);
1968 int maxregs
= max_reg_num ();
1969 regstat_init_n_sets_and_refs ();
1971 for (int i
= LAST_VIRTUAL_REGISTER
+ 1; i
< maxregs
; i
++)
1972 if (REG_N_SETS (i
) == 0 && REG_N_REFS (i
) == 0)
1973 regno_reg_rtx
[i
] = const0_rtx
;
1974 regstat_free_n_sets_and_refs ();
1977 /* Handle a "kernel" attribute; arguments as in
1978 struct attribute_spec.handler. */
1981 nvptx_handle_kernel_attribute (tree
*node
, tree name
, tree
ARG_UNUSED (args
),
1982 int ARG_UNUSED (flags
), bool *no_add_attrs
)
1986 if (TREE_CODE (decl
) != FUNCTION_DECL
)
1988 error ("%qE attribute only applies to functions", name
);
1989 *no_add_attrs
= true;
1992 else if (TREE_TYPE (TREE_TYPE (decl
)) != void_type_node
)
1994 error ("%qE attribute requires a void return type", name
);
1995 *no_add_attrs
= true;
2001 /* Table of valid machine attributes. */
2002 static const struct attribute_spec nvptx_attribute_table
[] =
2004 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
2005 affects_type_identity } */
2006 { "kernel", 0, 0, true, false, false, nvptx_handle_kernel_attribute
, false },
2007 { NULL
, 0, 0, false, false, false, NULL
, false }
2010 /* Limit vector alignments to BIGGEST_ALIGNMENT. */
2012 static HOST_WIDE_INT
2013 nvptx_vector_alignment (const_tree type
)
2015 HOST_WIDE_INT align
= tree_to_shwi (TYPE_SIZE (type
));
2017 return MIN (align
, BIGGEST_ALIGNMENT
);
2020 /* Record a symbol for mkoffload to enter into the mapping table. */
2023 nvptx_record_offload_symbol (tree decl
)
2025 fprintf (asm_out_file
, "//:%s_MAP %s\n",
2026 TREE_CODE (decl
) == VAR_DECL
? "VAR" : "FUNC",
2027 IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl
)));
2030 /* Implement TARGET_ASM_FILE_START. Write the kinds of things ptxas expects
2031 at the start of a file. */
2034 nvptx_file_start (void)
2036 fputs ("// BEGIN PREAMBLE\n", asm_out_file
);
2037 fputs ("\t.version\t3.1\n", asm_out_file
);
2038 fputs ("\t.target\tsm_30\n", asm_out_file
);
2039 fprintf (asm_out_file
, "\t.address_size %d\n", GET_MODE_BITSIZE (Pmode
));
2040 fputs ("// END PREAMBLE\n", asm_out_file
);
2043 /* Write out the function declarations we've collected. */
2046 nvptx_file_end (void)
2048 hash_table
<tree_hasher
>::iterator iter
;
2050 FOR_EACH_HASH_TABLE_ELEMENT (*needed_fndecls_htab
, decl
, tree
, iter
)
2051 nvptx_record_fndecl (decl
, true);
2052 fputs (func_decls
.str().c_str(), asm_out_file
);
2055 #undef TARGET_OPTION_OVERRIDE
2056 #define TARGET_OPTION_OVERRIDE nvptx_option_override
2058 #undef TARGET_ATTRIBUTE_TABLE
2059 #define TARGET_ATTRIBUTE_TABLE nvptx_attribute_table
2061 #undef TARGET_LEGITIMATE_ADDRESS_P
2062 #define TARGET_LEGITIMATE_ADDRESS_P nvptx_legitimate_address_p
2064 #undef TARGET_PROMOTE_FUNCTION_MODE
2065 #define TARGET_PROMOTE_FUNCTION_MODE nvptx_promote_function_mode
2067 #undef TARGET_FUNCTION_ARG
2068 #define TARGET_FUNCTION_ARG nvptx_function_arg
2069 #undef TARGET_FUNCTION_INCOMING_ARG
2070 #define TARGET_FUNCTION_INCOMING_ARG nvptx_function_incoming_arg
2071 #undef TARGET_FUNCTION_ARG_ADVANCE
2072 #define TARGET_FUNCTION_ARG_ADVANCE nvptx_function_arg_advance
2073 #undef TARGET_FUNCTION_ARG_BOUNDARY
2074 #define TARGET_FUNCTION_ARG_BOUNDARY nvptx_function_arg_boundary
2075 #undef TARGET_FUNCTION_ARG_ROUND_BOUNDARY
2076 #define TARGET_FUNCTION_ARG_ROUND_BOUNDARY nvptx_function_arg_boundary
2077 #undef TARGET_PASS_BY_REFERENCE
2078 #define TARGET_PASS_BY_REFERENCE nvptx_pass_by_reference
2079 #undef TARGET_FUNCTION_VALUE_REGNO_P
2080 #define TARGET_FUNCTION_VALUE_REGNO_P nvptx_function_value_regno_p
2081 #undef TARGET_FUNCTION_VALUE
2082 #define TARGET_FUNCTION_VALUE nvptx_function_value
2083 #undef TARGET_LIBCALL_VALUE
2084 #define TARGET_LIBCALL_VALUE nvptx_libcall_value
2085 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
2086 #define TARGET_FUNCTION_OK_FOR_SIBCALL nvptx_function_ok_for_sibcall
2087 #undef TARGET_SPLIT_COMPLEX_ARG
2088 #define TARGET_SPLIT_COMPLEX_ARG hook_bool_const_tree_true
2089 #undef TARGET_RETURN_IN_MEMORY
2090 #define TARGET_RETURN_IN_MEMORY nvptx_return_in_memory
2091 #undef TARGET_OMIT_STRUCT_RETURN_REG
2092 #define TARGET_OMIT_STRUCT_RETURN_REG true
2093 #undef TARGET_STRICT_ARGUMENT_NAMING
2094 #define TARGET_STRICT_ARGUMENT_NAMING nvptx_strict_argument_naming
2095 #undef TARGET_STATIC_CHAIN
2096 #define TARGET_STATIC_CHAIN nvptx_static_chain
2098 #undef TARGET_CALL_ARGS
2099 #define TARGET_CALL_ARGS nvptx_call_args
2100 #undef TARGET_END_CALL_ARGS
2101 #define TARGET_END_CALL_ARGS nvptx_end_call_args
2103 #undef TARGET_ASM_FILE_START
2104 #define TARGET_ASM_FILE_START nvptx_file_start
2105 #undef TARGET_ASM_FILE_END
2106 #define TARGET_ASM_FILE_END nvptx_file_end
2107 #undef TARGET_ASM_GLOBALIZE_LABEL
2108 #define TARGET_ASM_GLOBALIZE_LABEL nvptx_globalize_label
2109 #undef TARGET_ASM_ASSEMBLE_UNDEFINED_DECL
2110 #define TARGET_ASM_ASSEMBLE_UNDEFINED_DECL nvptx_assemble_undefined_decl
2111 #undef TARGET_PRINT_OPERAND
2112 #define TARGET_PRINT_OPERAND nvptx_print_operand
2113 #undef TARGET_PRINT_OPERAND_ADDRESS
2114 #define TARGET_PRINT_OPERAND_ADDRESS nvptx_print_operand_address
2115 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
2116 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P nvptx_print_operand_punct_valid_p
2117 #undef TARGET_ASM_INTEGER
2118 #define TARGET_ASM_INTEGER nvptx_assemble_integer
2119 #undef TARGET_ASM_DECL_END
2120 #define TARGET_ASM_DECL_END nvptx_assemble_decl_end
2121 #undef TARGET_ASM_DECLARE_CONSTANT_NAME
2122 #define TARGET_ASM_DECLARE_CONSTANT_NAME nvptx_asm_declare_constant_name
2123 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
2124 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
2125 #undef TARGET_ASM_NEED_VAR_DECL_BEFORE_USE
2126 #define TARGET_ASM_NEED_VAR_DECL_BEFORE_USE true
2128 #undef TARGET_MACHINE_DEPENDENT_REORG
2129 #define TARGET_MACHINE_DEPENDENT_REORG nvptx_reorg
2130 #undef TARGET_NO_REGISTER_ALLOCATION
2131 #define TARGET_NO_REGISTER_ALLOCATION true
2133 #undef TARGET_RECORD_OFFLOAD_SYMBOL
2134 #define TARGET_RECORD_OFFLOAD_SYMBOL nvptx_record_offload_symbol
2136 #undef TARGET_VECTOR_ALIGNMENT
2137 #define TARGET_VECTOR_ALIGNMENT nvptx_vector_alignment
2139 struct gcc_target targetm
= TARGET_INITIALIZER
;
2141 #include "gt-nvptx.h"