libgomp: Make nvptx helper routines self-contained.
authorThomas Schwinge <thomas@codesourcery.com>
Tue, 17 Feb 2015 18:05:24 +0000 (19:05 +0100)
committerThomas Schwinge <tschwinge@gcc.gnu.org>
Tue, 17 Feb 2015 18:05:24 +0000 (19:05 +0100)
libgomp/
* oacc-ptx.h (GOACC_INTERNAL_PTX): Add GOACC_tid, GOACC_ntid,
GOACC_ctaid, and GOACC_nctaid routines.

Co-Authored-By: Cesar Philippidis <cesar@codesourcery.com>
From-SVN: r220768

libgomp/ChangeLog
libgomp/oacc-ptx.h

index 6c24531744cea380cbbf2cd85fb9a5d8c5b02fb3..2c32d9e94fb7957410aac30e9227528165fcf4d5 100644 (file)
@@ -1,3 +1,9 @@
+2015-02-17  Thomas Schwinge  <thomas@codesourcery.com>
+           Cesar Philippidis  <cesar@codesourcery.com>
+
+       * oacc-ptx.h (GOACC_INTERNAL_PTX): Add GOACC_tid, GOACC_ntid,
+       GOACC_ctaid, and GOACC_nctaid routines.
+
 2015-02-11  Jakub Jelinek  <jakub@redhat.com>
 
        PR c/64824
index 13ff86fb4af34a6b6793028aa81650424ac3cd57..2419a460c72f3da619bea184378712694cb47c0b 100644 (file)
   ".version 3.1\n" \
   ".target sm_30\n" \
   ".address_size 64\n" \
+  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
+  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
+  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
+  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
   ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
   ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
   ".extern .func abort;\n" \
+  ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
+  "{\n" \
+  ".reg .u32 %ar1;\n" \
+  ".reg .u32 %retval;\n" \
+  ".reg .u64 %hr10;\n" \
+  ".reg .u32 %r22;\n" \
+  ".reg .u32 %r23;\n" \
+  ".reg .u32 %r24;\n" \
+  ".reg .u32 %r25;\n" \
+  ".reg .u32 %r26;\n" \
+  ".reg .u32 %r27;\n" \
+  ".reg .u32 %r28;\n" \
+  ".reg .u32 %r29;\n" \
+  ".reg .pred %r30;\n" \
+  ".reg .u32 %r31;\n" \
+  ".reg .pred %r32;\n" \
+  ".reg .u32 %r33;\n" \
+  ".reg .pred %r34;\n" \
+  ".local .align 8 .b8 %frame[4];\n" \
+  "ld.param.u32 %ar1,[%in_ar1];\n" \
+  "mov.u32 %r27,%ar1;\n" \
+  "st.local.u32 [%frame],%r27;\n" \
+  "ld.local.u32 %r28,[%frame];\n" \
+  "mov.u32 %r29,1;\n"                                                  \
+  "setp.eq.u32 %r30,%r28,%r29;\n"                                      \
+  "@%r30 bra $L4;\n"                                                   \
+  "mov.u32 %r31,2;\n"                                                  \
+  "setp.eq.u32 %r32,%r28,%r31;\n"                                      \
+  "@%r32 bra $L5;\n"                                                   \
+  "mov.u32 %r33,0;\n"                                                  \
+  "setp.eq.u32 %r34,%r28,%r33;\n"                                      \
+  "@!%r34 bra $L8;\n"                                                  \
+  "mov.u32 %r23,%tid.x;\n"                                             \
+  "mov.u32 %r22,%r23;\n"                                               \
+  "bra $L7;\n"                                                         \
+  "$L4:\n"                                                             \
+  "mov.u32 %r24,%tid.y;\n"                                             \
+  "mov.u32 %r22,%r24;\n"                                               \
+  "bra $L7;\n"                                                         \
+  "$L5:\n"                                                             \
+  "mov.u32 %r25,%tid.z;\n"                                             \
+  "mov.u32 %r22,%r25;\n"                                               \
+  "bra $L7;\n"                                                         \
+  "$L8:\n"                                                             \
+  "{\n"                                                                        \
+  "{\n"                                                                        \
+  "call abort;\n"                                                      \
+  "}\n"                                                                        \
+  "}\n"                                                                        \
+  "$L7:\n"                                                             \
+  "mov.u32 %r26,%r22;\n"                                               \
+  "mov.u32 %retval,%r26;\n"                                            \
+  "st.param.u32 [%out_retval],%retval;\n"                              \
+  "ret;\n"                                                             \
+  "}\n"                                                                        \
+  ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
+  "{\n"                                                                        \
+  ".reg .u32 %ar1;\n"                                                  \
+  ".reg .u32 %retval;\n"                                               \
+  ".reg .u64 %hr10;\n"                                                 \
+  ".reg .u32 %r22;\n"                                                  \
+  ".reg .u32 %r23;\n"                                                  \
+  ".reg .u32 %r24;\n"                                                  \
+  ".reg .u32 %r25;\n"                                                  \
+  ".reg .u32 %r26;\n"                                                  \
+  ".reg .u32 %r27;\n"                                                  \
+  ".reg .u32 %r28;\n"                                                  \
+  ".reg .u32 %r29;\n"                                                  \
+  ".reg .pred %r30;\n"                                                 \
+  ".reg .u32 %r31;\n"                                                  \
+  ".reg .pred %r32;\n"                                                 \
+  ".reg .u32 %r33;\n"                                                  \
+  ".reg .pred %r34;\n"                                                 \
+  ".local .align 8 .b8 %frame[4];\n"                                   \
+  "ld.param.u32 %ar1,[%in_ar1];\n"                                     \
+  "mov.u32 %r27,%ar1;\n"                                               \
+  "st.local.u32 [%frame],%r27;\n"                                      \
+  "ld.local.u32 %r28,[%frame];\n"                                      \
+  "mov.u32 %r29,1;\n"                                                  \
+  "setp.eq.u32 %r30,%r28,%r29;\n"                                      \
+  "@%r30 bra $L11;\n"                                                  \
+  "mov.u32 %r31,2;\n"                                                  \
+  "setp.eq.u32 %r32,%r28,%r31;\n"                                      \
+  "@%r32 bra $L12;\n"                                                  \
+  "mov.u32 %r33,0;\n"                                                  \
+  "setp.eq.u32 %r34,%r28,%r33;\n"                                      \
+  "@!%r34 bra $L15;\n"                                                 \
+  "mov.u32 %r23,%ntid.x;\n"                                            \
+  "mov.u32 %r22,%r23;\n"                                               \
+  "bra $L14;\n"                                                                \
+  "$L11:\n"                                                            \
+  "mov.u32 %r24,%ntid.y;\n"                                            \
+  "mov.u32 %r22,%r24;\n"                                               \
+  "bra $L14;\n"                                                                \
+  "$L12:\n"                                                            \
+  "mov.u32 %r25,%ntid.z;\n"                                            \
+  "mov.u32 %r22,%r25;\n"                                               \
+  "bra $L14;\n"                                                                \
+  "$L15:\n"                                                            \
+  "{\n"                                                                        \
+  "{\n"                                                                        \
+  "call abort;\n"                                                      \
+  "}\n"                                                                        \
+  "}\n"                                                                        \
+  "$L14:\n"                                                            \
+  "mov.u32 %r26,%r22;\n"                                               \
+  "mov.u32 %retval,%r26;\n"                                            \
+  "st.param.u32 [%out_retval],%retval;\n"                              \
+  "ret;\n"                                                             \
+  "}\n"                                                                        \
+  ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
+  "{\n"                                                                        \
+  ".reg .u32 %ar1;\n"                                                  \
+  ".reg .u32 %retval;\n"                                               \
+  ".reg .u64 %hr10;\n"                                                 \
+  ".reg .u32 %r22;\n"                                                  \
+  ".reg .u32 %r23;\n"                                                  \
+  ".reg .u32 %r24;\n"                                                  \
+  ".reg .u32 %r25;\n"                                                  \
+  ".reg .u32 %r26;\n"                                                  \
+  ".reg .u32 %r27;\n"                                                  \
+  ".reg .u32 %r28;\n"                                                  \
+  ".reg .u32 %r29;\n"                                                  \
+  ".reg .pred %r30;\n"                                                 \
+  ".reg .u32 %r31;\n"                                                  \
+  ".reg .pred %r32;\n"                                                 \
+  ".reg .u32 %r33;\n"                                                  \
+  ".reg .pred %r34;\n"                                                 \
+  ".local .align 8 .b8 %frame[4];\n"                                   \
+  "ld.param.u32 %ar1,[%in_ar1];\n"                                     \
+  "mov.u32 %r27,%ar1;\n"                                               \
+  "st.local.u32 [%frame],%r27;\n"                                      \
+  "ld.local.u32 %r28,[%frame];\n"                                      \
+  "mov.u32 %r29,1;\n"                                                  \
+  "setp.eq.u32 %r30,%r28,%r29;\n"                                      \
+  "@%r30 bra $L18;\n"                                                  \
+  "mov.u32 %r31,2;\n"                                                  \
+  "setp.eq.u32 %r32,%r28,%r31;\n"                                      \
+  "@%r32 bra $L19;\n"                                                  \
+  "mov.u32 %r33,0;\n"                                                  \
+  "setp.eq.u32 %r34,%r28,%r33;\n"                                      \
+  "@!%r34 bra $L22;\n"                                                 \
+  "mov.u32 %r23,%ctaid.x;\n"                                           \
+  "mov.u32 %r22,%r23;\n"                                               \
+  "bra $L21;\n"                                                                \
+  "$L18:\n"                                                            \
+  "mov.u32 %r24,%ctaid.y;\n"                                           \
+  "mov.u32 %r22,%r24;\n"                                               \
+  "bra $L21;\n"                                                                \
+  "$L19:\n"                                                            \
+  "mov.u32 %r25,%ctaid.z;\n"                                           \
+  "mov.u32 %r22,%r25;\n"                                               \
+  "bra $L21;\n"                                                                \
+  "$L22:\n"                                                            \
+  "{\n"                                                                        \
+  "{\n"                                                                        \
+  "call abort;\n"                                                      \
+  "}\n"                                                                        \
+  "}\n"                                                                        \
+  "$L21:\n"                                                            \
+  "mov.u32 %r26,%r22;\n"                                               \
+  "mov.u32 %retval,%r26;\n"                                            \
+  "st.param.u32 [%out_retval],%retval;\n"                              \
+  "ret;\n"                                                             \
+  "}\n"                                                                        \
+  ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
+  "{\n"                                                                        \
+  ".reg .u32 %ar1;\n"                                                  \
+  ".reg .u32 %retval;\n"                                               \
+  ".reg .u64 %hr10;\n"                                                 \
+  ".reg .u32 %r22;\n"                                                  \
+  ".reg .u32 %r23;\n"                                                  \
+  ".reg .u32 %r24;\n"                                                  \
+  ".reg .u32 %r25;\n"                                                  \
+  ".reg .u32 %r26;\n"                                                  \
+  ".reg .u32 %r27;\n"                                                  \
+  ".reg .u32 %r28;\n"                                                  \
+  ".reg .u32 %r29;\n"                                                  \
+  ".reg .pred %r30;\n"                                                 \
+  ".reg .u32 %r31;\n"                                                  \
+  ".reg .pred %r32;\n"                                                 \
+  ".reg .u32 %r33;\n"                                                  \
+  ".reg .pred %r34;\n"                                                 \
+  ".local .align 8 .b8 %frame[4];\n"                                   \
+  "ld.param.u32 %ar1,[%in_ar1];\n"                                     \
+  "mov.u32 %r27,%ar1;\n"                                               \
+  "st.local.u32 [%frame],%r27;\n"                                      \
+  "ld.local.u32 %r28,[%frame];\n"                                      \
+  "mov.u32 %r29,1;\n"                                                  \
+  "setp.eq.u32 %r30,%r28,%r29;\n"                                      \
+  "@%r30 bra $L25;\n"                                                  \
+  "mov.u32 %r31,2;\n"                                                  \
+  "setp.eq.u32 %r32,%r28,%r31;\n"                                      \
+  "@%r32 bra $L26;\n"                                                  \
+  "mov.u32 %r33,0;\n"                                                  \
+  "setp.eq.u32 %r34,%r28,%r33;\n"                                      \
+  "@!%r34 bra $L29;\n"                                                 \
+  "mov.u32 %r23,%nctaid.x;\n"                                          \
+  "mov.u32 %r22,%r23;\n"                                               \
+  "bra $L28;\n"                                                                \
+  "$L25:\n"                                                            \
+  "mov.u32 %r24,%nctaid.y;\n"                                          \
+  "mov.u32 %r22,%r24;\n"                                               \
+  "bra $L28;\n"                                                                \
+  "$L26:\n"                                                            \
+  "mov.u32 %r25,%nctaid.z;\n"                                          \
+  "mov.u32 %r22,%r25;\n"                                               \
+  "bra $L28;\n"                                                                \
+  "$L29:\n"                                                            \
+  "{\n"                                                                        \
+  "{\n"                                                                        \
+  "call abort;\n"                                                      \
+  "}\n"                                                                        \
+  "}\n"                                                                        \
+  "$L28:\n"                                                            \
+  "mov.u32 %r26,%r22;\n"                                               \
+  "mov.u32 %retval,%r26;\n"                                            \
+  "st.param.u32 [%out_retval],%retval;\n"                              \
+  "ret;\n"                                                             \
+  "}\n"                                                                        \
   ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"   \
   "{\n"                                                                        \
   ".reg .u32 %retval;\n"                                               \