From 113020dc598b57e0f2a73bf1516e5c83668aca21 Mon Sep 17 00:00:00 2001 From: Thomas Schwinge Date: Fri, 2 Oct 2015 21:43:41 +0200 Subject: [PATCH] nvptx offloading linking gcc/ * config/nvptx/mkoffload.c (Kind, Vis): Remove enums. (Token, Stmt): Remove structs. (decls, vars, fns): Remove variables. (alloc_comment, append_stmt, is_keyword): Remove macros. (tokenize, write_token, write_tokens, alloc_stmt, rev_stmts) (write_stmt, write_stmts, parse_insn, parse_list_nosemi) (parse_init, parse_file): Remove functions. (read_file): Accept a pointer to a length and store into it. (process): Don't try to parse the input file, just write it out as a string, but looking for maps. Also write out the length. (main): Don't use "-S" to compile PTX code. libgomp/ * oacc-ptx.h: Remove file, moving its content into... * config/nvptx/fortran.c: ... here... * config/nvptx/oacc-init.c: ..., here... * config/nvptx/oacc-parallel.c: ..., and here. * config/nvptx/openacc.f90: New file. * plugin/plugin-nvptx.c: Don't include "oacc-ptx.h". (link_ptx): Don't link in predefined bits of PTX code. Co-Authored-By: Bernd Schmidt From-SVN: r228418 --- gcc/ChangeLog | 15 + gcc/config/nvptx/mkoffload.c | 677 ++------------------------- libgomp/ChangeLog | 10 + libgomp/config/nvptx/fortran.c | 40 ++ libgomp/config/nvptx/oacc-init.c | 42 ++ libgomp/config/nvptx/oacc-parallel.c | 358 ++++++++++++++ libgomp/config/nvptx/openacc.f90 | 102 ++++ libgomp/oacc-init.c | 6 +- libgomp/oacc-ptx.h | 426 ----------------- libgomp/plugin/plugin-nvptx.c | 30 -- 10 files changed, 617 insertions(+), 1089 deletions(-) create mode 100644 libgomp/config/nvptx/openacc.f90 delete mode 100644 libgomp/oacc-ptx.h diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 6a0e10297d1..d1235bd6718 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,18 @@ +2015-10-02 Thomas Schwinge + Bernd Schmidt + + * config/nvptx/mkoffload.c (Kind, Vis): Remove enums. + (Token, Stmt): Remove structs. + (decls, vars, fns): Remove variables. + (alloc_comment, append_stmt, is_keyword): Remove macros. + (tokenize, write_token, write_tokens, alloc_stmt, rev_stmts) + (write_stmt, write_stmts, parse_insn, parse_list_nosemi) + (parse_init, parse_file): Remove functions. + (read_file): Accept a pointer to a length and store into it. + (process): Don't try to parse the input file, just write it out as + a string, but looking for maps. Also write out the length. + (main): Don't use "-S" to compile PTX code. + 2015-10-02 Jeff Law * tree-ssa-dom.c (optimize_stmt): Note when loop structures need diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c index 69eb4eac415..ff538e2d572 100644 --- a/gcc/config/nvptx/mkoffload.c +++ b/gcc/config/nvptx/mkoffload.c @@ -41,84 +41,12 @@ const char tool_name[] = "nvptx mkoffload"; #define COMMENT_PREFIX "#" -typedef enum Kind -{ - /* 0-ff used for single char tokens */ - K_symbol = 0x100, /* a symbol */ - K_label, /* a label defn (i.e. symbol:) */ - K_ident, /* other ident */ - K_dotted, /* dotted identifier */ - K_number, - K_string, - K_comment -} Kind; - -typedef struct Token -{ - unsigned short kind : 12; - unsigned short space : 1; /* preceded by space */ - unsigned short end : 1; /* succeeded by end of line */ - /* Length of token */ - unsigned short len; - - /* Token itself */ - char const *ptr; -} Token; - -/* statement info */ -typedef enum Vis -{ - V_dot = 0, /* random pseudo */ - V_var = 1, /* var decl/defn */ - V_func = 2, /* func decl/defn */ - V_insn = 3, /* random insn */ - V_label = 4, /* label defn */ - V_comment = 5, - V_pred = 6, /* predicate */ - V_mask = 0x7, - V_global = 0x08, /* globalize */ - V_weak = 0x10, /* weakly globalize */ - V_no_eol = 0x20, /* no end of line */ - V_prefix_comment = 0x40 /* prefixed comment */ -} Vis; - -typedef struct Stmt -{ - struct Stmt *next; - Token *tokens; - unsigned char vis; - unsigned len : 12; - unsigned sym : 12; -} Stmt; - struct id_map { id_map *next; char *ptx_name; }; -static const char *read_file (FILE *); -static Token *tokenize (const char *); - -static void write_token (FILE *, const Token *); -static void write_tokens (FILE *, const Token *, unsigned, int); - -static Stmt *alloc_stmt (unsigned, Token *, Token *, const Token *); -#define alloc_comment(S,E) alloc_stmt (V_comment, S, E, 0) -#define append_stmt(V, S) ((S)->next = *(V), *(V) = (S)) -static Stmt *rev_stmts (Stmt *); -static void write_stmt (FILE *, const Stmt *); -static void write_stmts (FILE *, const Stmt *); - -static Token *parse_insn (Token *); -static Token *parse_list_nosemi (Token *); -static Token *parse_init (Token *); -static Token *parse_file (Token *); - -static Stmt *decls; -static Stmt *vars; -static Stmt *fns; - static id_map *func_ids, **funcs_tail = &func_ids; static id_map *var_ids, **vars_tail = &var_ids; @@ -183,7 +111,7 @@ record_id (const char *p1, id_map ***where) remember, there could be a NUL in the file itself. */ static const char * -read_file (FILE *stream) +read_file (FILE *stream, size_t *plen) { size_t alloc = 16384; size_t base = 0; @@ -213,557 +141,10 @@ read_file (FILE *stream) } } buffer[base] = 0; + *plen = base; return buffer; } -/* Read a token, advancing ptr. - If we read a comment, append it to the comments block. */ - -static Token * -tokenize (const char *ptr) -{ - unsigned alloc = 1000; - unsigned num = 0; - Token *toks = XNEWVEC (Token, alloc); - int in_comment = 0; - int not_comment = 0; - - for (;; num++) - { - const char *base; - unsigned kind; - int ws = 0; - int eol = 0; - - again: - base = ptr; - if (in_comment) - goto block_comment; - switch (kind = *ptr++) - { - default: - break; - - case '\n': - eol = 1; - /* Fall through */ - case ' ': - case '\t': - case '\r': - case '\v': - /* White space */ - ws = not_comment; - goto again; - - case '/': - { - if (*ptr == '/') - { - /* line comment. Do not include trailing \n */ - base += 2; - for (; *ptr; ptr++) - if (*ptr == '\n') - break; - kind = K_comment; - } - else if (*ptr == '*') - { - /* block comment */ - base += 2; - ptr++; - - block_comment: - eol = in_comment; - in_comment = 1; - for (; *ptr; ptr++) - { - if (*ptr == '\n') - { - ptr++; - break; - } - if (ptr[0] == '*' && ptr[1] == '/') - { - in_comment = 2; - ptr += 2; - break; - } - } - kind = K_comment; - } - else - break; - } - break; - - case '"': - /* quoted string */ - kind = K_string; - while (*ptr) - if (*ptr == '"') - { - ptr++; - break; - } - else if (*ptr++ == '\\') - ptr++; - break; - - case '.': - if (*ptr < '0' || *ptr > '9') - { - kind = K_dotted; - ws = not_comment; - goto ident; - } - /* FALLTHROUGH */ - case '0'...'9': - kind = K_number; - goto ident; - break; - - case '$': /* local labels. */ - case '%': /* register names, pseudoes etc */ - kind = K_ident; - goto ident; - - case 'a'...'z': - case 'A'...'Z': - case '_': - kind = K_symbol; /* possible symbol name */ - ident: - for (; *ptr; ptr++) - { - if (*ptr >= 'A' && *ptr <= 'Z') - continue; - if (*ptr >= 'a' && *ptr <= 'z') - continue; - if (*ptr >= '0' && *ptr <= '9') - continue; - if (*ptr == '_' || *ptr == '$') - continue; - if (*ptr == '.' && kind != K_dotted) - /* Idents starting with a dot, cannot have internal dots. */ - continue; - if ((*ptr == '+' || *ptr == '-') - && kind == K_number - && (ptr[-1] == 'e' || ptr[-1] == 'E' - || ptr[-1] == 'p' || ptr[-1] == 'P')) - /* exponent */ - continue; - break; - } - if (*ptr == ':') - { - ptr++; - kind = K_label; - } - break; - } - - if (alloc == num) - { - alloc *= 2; - toks = XRESIZEVEC (Token, toks, alloc); - } - Token *tok = toks + num; - - tok->kind = kind; - tok->space = ws; - tok->end = 0; - tok->ptr = base; - tok->len = ptr - base - in_comment; - in_comment &= 1; - not_comment = kind != K_comment; - if (eol && num) - tok[-1].end = 1; - if (!kind) - break; - } - - return toks; -} - -/* Write an encoded token. */ - -static void -write_token (FILE *out, Token const *tok) -{ - if (tok->space) - fputc (' ', out); - - switch (tok->kind) - { - case K_string: - { - const char *c = tok->ptr + 1; - size_t len = tok->len - 2; - - fputs ("\\\"", out); - while (len) - { - const char *bs = (const char *)memchr (c, '\\', len); - size_t l = bs ? bs - c : len; - - fprintf (out, "%.*s", (int)l, c); - len -= l; - c += l; - if (bs) - { - fputs ("\\\\", out); - len--, c++; - } - } - fputs ("\\\"", out); - } - break; - - default: - /* All other tokens shouldn't have anything magic in them */ - fprintf (out, "%.*s", tok->len, tok->ptr); - break; - } - if (tok->end) - fputs ("\\n", out); -} - -static void -write_tokens (FILE *out, Token const *toks, unsigned len, int spc) -{ - fputs ("\t\"", out); - for (; len--; toks++) - write_token (out, toks); - if (spc) - fputs (" ", out); - fputs ("\"", out); -} - -static Stmt * -alloc_stmt (unsigned vis, Token *tokens, Token *end, Token const *sym) -{ - static unsigned alloc = 0; - static Stmt *heap = 0; - - if (!alloc) - { - alloc = 1000; - heap = XNEWVEC (Stmt, alloc); - } - - Stmt *stmt = heap++; - alloc--; - - tokens->space = 0; - stmt->next = 0; - stmt->vis = vis; - stmt->tokens = tokens; - stmt->len = end - tokens; - stmt->sym = sym ? sym - tokens : ~0; - - return stmt; -} - -static Stmt * -rev_stmts (Stmt *stmt) -{ - Stmt *prev = 0; - Stmt *next; - - while (stmt) - { - next = stmt->next; - stmt->next = prev; - prev = stmt; - stmt = next; - } - - return prev; -} - -static void -write_stmt (FILE *out, const Stmt *stmt) -{ - if ((stmt->vis & V_mask) != V_comment) - { - write_tokens (out, stmt->tokens, stmt->len, - (stmt->vis & V_mask) == V_pred); - fputs (stmt->vis & V_no_eol ? "\t" : "\n", out); - } -} - -static void -write_stmts (FILE *out, const Stmt *stmts) -{ - for (; stmts; stmts = stmts->next) - write_stmt (out, stmts); -} - -static Token * -parse_insn (Token *tok) -{ - unsigned depth = 0; - - do - { - Stmt *stmt; - Token *sym = 0; - unsigned s = V_insn; - Token *start = tok; - - switch (tok++->kind) - { - case K_comment: - while (tok->kind == K_comment) - tok++; - stmt = alloc_comment (start, tok); - append_stmt (&fns, stmt); - continue; - - case '{': - depth++; - break; - - case '}': - depth--; - break; - - case K_label: - if (tok[-1].ptr[0] != '$') - sym = tok - 1; - tok[-1].end = 1; - s = V_label; - break; - - case '@': - tok->space = 0; - if (tok->kind == '!') - tok++; - if (tok->kind == K_symbol) - sym = tok; - tok++; - s = V_pred; - break; - - default: - for (; tok->kind != ';'; tok++) - { - if (tok->kind == ',') - tok[1].space = 0; - else if (tok->kind == K_symbol) - sym = tok; - } - tok++->end = 1; - break; - } - - stmt = alloc_stmt (s, start, tok, sym); - append_stmt (&fns, stmt); - - if (!tok[-1].end && tok[0].kind == K_comment) - { - stmt->vis |= V_no_eol; - stmt = alloc_comment (tok, tok + 1); - append_stmt (&fns, stmt); - tok++; - } - } - while (depth); - - return tok; -} - -/* comma separated list of tokens */ - -static Token * -parse_list_nosemi (Token *tok) -{ - Token *start = tok; - - do - if (!(++tok)->kind) - break; - while ((++tok)->kind == ','); - - tok[-1].end = 1; - Stmt *stmt = alloc_stmt (V_dot, start, tok, 0); - append_stmt (&decls, stmt); - - return tok; -} - -#define is_keyword(T,S) \ - (sizeof (S) == (T)->len && !memcmp ((T)->ptr + 1, (S), (T)->len - 1)) - -static Token * -parse_init (Token *tok) -{ - for (;;) - { - Token *start = tok; - Token const *sym = 0; - Stmt *stmt; - - if (tok->kind == K_comment) - { - while (tok->kind == K_comment) - tok++; - stmt = alloc_comment (start, tok); - append_stmt (&vars, stmt); - start = tok; - } - - if (tok->kind == '{') - tok[1].space = 0; - for (; tok->kind != ',' && tok->kind != ';'; tok++) - if (tok->kind == K_symbol) - sym = tok; - tok[1].space = 0; - int end = tok++->kind == ';'; - stmt = alloc_stmt (V_insn, start, tok, sym); - append_stmt (&vars, stmt); - if (!tok[-1].end && tok->kind == K_comment) - { - stmt->vis |= V_no_eol; - stmt = alloc_comment (tok, tok + 1); - append_stmt (&vars, stmt); - tok++; - } - if (end) - break; - } - return tok; -} - -static Token * -parse_file (Token *tok) -{ - Stmt *comment = 0; - - if (tok->kind == K_comment) - { - Token *start = tok; - - while (tok->kind == K_comment) - { - if (strncmp (tok->ptr, ":VAR_MAP ", 9) == 0) - record_id (tok->ptr + 9, &vars_tail); - if (strncmp (tok->ptr, ":FUNC_MAP ", 10) == 0) - record_id (tok->ptr + 10, &funcs_tail); - tok++; - } - comment = alloc_comment (start, tok); - comment->vis |= V_prefix_comment; - } - - if (tok->kind == K_dotted) - { - if (is_keyword (tok, "version") - || is_keyword (tok, "target") - || is_keyword (tok, "address_size")) - { - if (comment) - append_stmt (&decls, comment); - tok = parse_list_nosemi (tok); - } - else - { - unsigned vis = 0; - const Token *def = 0; - unsigned is_decl = 0; - Token *start; - - for (start = tok; - tok->kind && tok->kind != '=' && tok->kind != K_comment - && tok->kind != '{' && tok->kind != ';'; tok++) - { - if (is_keyword (tok, "global") - || is_keyword (tok, "const")) - vis |= V_var; - else if (is_keyword (tok, "func") - || is_keyword (tok, "entry")) - vis |= V_func; - else if (is_keyword (tok, "visible")) - vis |= V_global; - else if (is_keyword (tok, "extern")) - is_decl = 1; - else if (is_keyword (tok, "weak")) - vis |= V_weak; - if (tok->kind == '(') - { - tok[1].space = 0; - tok[0].space = 1; - } - else if (tok->kind == ')' && tok[1].kind != ';') - tok[1].space = 1; - - if (tok->kind == K_symbol) - def = tok; - } - - if (!tok->kind) - { - /* end of file */ - if (comment) - append_stmt (&fns, comment); - } - else if (tok->kind == '{' - || tok->kind == K_comment) - { - /* function defn */ - Stmt *stmt = alloc_stmt (vis, start, tok, def); - if (comment) - { - append_stmt (&fns, comment); - stmt->vis |= V_prefix_comment; - } - append_stmt (&fns, stmt); - tok = parse_insn (tok); - } - else - { - int assign = tok->kind == '='; - - tok++->end = 1; - if ((vis & V_mask) == V_var && !is_decl) - { - /* variable */ - Stmt *stmt = alloc_stmt (vis, start, tok, def); - if (comment) - { - append_stmt (&vars, comment); - stmt->vis |= V_prefix_comment; - } - append_stmt (&vars, stmt); - if (assign) - tok = parse_init (tok); - } - else - { - /* declaration */ - Stmt *stmt = alloc_stmt (vis, start, tok, 0); - if (comment) - { - append_stmt (&decls, comment); - stmt->vis |= V_prefix_comment; - } - append_stmt (&decls, stmt); - } - } - } - } - else - { - /* Something strange. Ignore it. */ - if (comment) - append_stmt (&fns, comment); - - do - tok++; - while (tok->kind && !tok->end); - } - return tok; -} - /* Parse STR, saving found tokens into PVALUES and return their number. Tokens are assumed to be delimited by ':'. */ static unsigned @@ -839,22 +220,55 @@ access_check (const char *name, int mode) static void process (FILE *in, FILE *out) { - const char *input = read_file (in); - Token *tok = tokenize (input); + size_t len = 0; + const char *input = read_file (in, &len); const char *comma; id_map const *id; unsigned obj_count = 0; unsigned ix; - do - tok = parse_file (tok); - while (tok->kind); + /* Dump out char arrays for each PTX object file. These are + terminated by a NUL. */ + for (size_t i = 0; i != len;) + { + char c; - fprintf (out, "static const char ptx_code_%u[] = \n", obj_count++); - write_stmts (out, rev_stmts (decls)); - write_stmts (out, rev_stmts (vars)); - write_stmts (out, rev_stmts (fns)); - fprintf (out, ";\n\n"); + fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++); + while ((c = input[i++])) + { + switch (c) + { + case '\r': + continue; + case '\n': + fprintf (out, "\\n\"\n\t\""); + /* Look for mappings on subsequent lines. */ + while (strncmp (input + i, "//:", 3) == 0) + { + i += 3; + + if (strncmp (input + i, "VAR_MAP ", 8) == 0) + record_id (input + i + 8, &vars_tail); + else if (strncmp (input + i, "FUNC_MAP ", 9) == 0) + record_id (input + i + 9, &funcs_tail); + else + abort (); + /* Skip to next line. */ + while (input[i++] != '\n') + continue; + } + continue; + case '"': + case '\\': + putc ('\\', out); + break; + default: + break; + } + putc (c, out); + } + fprintf (out, "\";\n\n"); + } /* Dump out array of pointers to ptx object strings. */ fprintf (out, "static const struct ptx_obj {\n" @@ -1068,7 +482,6 @@ main (int argc, char **argv) default: gcc_unreachable (); } - obstack_ptr_grow (&argv_obstack, "-S"); for (int ix = 1; ix != argc; ix++) { diff --git a/libgomp/ChangeLog b/libgomp/ChangeLog index b38234b8788..191f21fd4e0 100644 --- a/libgomp/ChangeLog +++ b/libgomp/ChangeLog @@ -1,3 +1,13 @@ +2015-10-02 Thomas Schwinge + + * oacc-ptx.h: Remove file, moving its content into... + * config/nvptx/fortran.c: ... here... + * config/nvptx/oacc-init.c: ..., here... + * config/nvptx/oacc-parallel.c: ..., and here. + * config/nvptx/openacc.f90: New file. + * plugin/plugin-nvptx.c: Don't include "oacc-ptx.h". + (link_ptx): Don't link in predefined bits of PTX code. + 2015-09-30 Nathan Sidwell Bernd Schmidt diff --git a/libgomp/config/nvptx/fortran.c b/libgomp/config/nvptx/fortran.c index e69de29bb2d..58ca7905a76 100644 --- a/libgomp/config/nvptx/fortran.c +++ b/libgomp/config/nvptx/fortran.c @@ -0,0 +1,40 @@ +/* OpenACC Runtime Fortran wrapper routines + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +/* Temporary hack; this will be provided by libgfortran. */ + +extern void _gfortran_abort (void); + +__asm__ ("// BEGIN GLOBAL FUNCTION DECL: _gfortran_abort\n" + ".visible .func _gfortran_abort;\n" + "// BEGIN GLOBAL FUNCTION DEF: _gfortran_abort\n" + ".visible .func _gfortran_abort\n" + "{\n" + "trap;\n" + "ret;\n" + "}\n"); diff --git a/libgomp/config/nvptx/oacc-init.c b/libgomp/config/nvptx/oacc-init.c index e69de29bb2d..c57a3f3104d 100644 --- a/libgomp/config/nvptx/oacc-init.c +++ b/libgomp/config/nvptx/oacc-init.c @@ -0,0 +1,42 @@ +/* OpenACC Runtime initialization routines + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "openacc.h" + +/* For -O and higher, the compiler always attempts to expand acc_on_device, but + if the user disables the builtin, or calls it via a pointer, we'll need this + version. + + Compile this with optimization, so that the compiler expands + this, rather than generating infinitely recursive code. */ + +int __attribute__ ((__optimize__ ("O2"))) +acc_on_device (acc_device_t dev) +{ + return __builtin_acc_on_device (dev); +} diff --git a/libgomp/config/nvptx/oacc-parallel.c b/libgomp/config/nvptx/oacc-parallel.c index e69de29bb2d..b9712562e61 100644 --- a/libgomp/config/nvptx/oacc-parallel.c +++ b/libgomp/config/nvptx/oacc-parallel.c @@ -0,0 +1,358 @@ +/* OpenACC constructs + + Copyright (C) 2014-2015 Free Software Foundation, Inc. + + Contributed by Mentor Embedded. + + This file is part of the GNU Offloading and Multi Processing Library + (libgomp). + + Libgomp is free software; you can redistribute it and/or modify it + under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 3, or (at your option) + any later version. + + Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY + WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS + FOR A PARTICULAR PURPOSE. See the GNU General Public License for + more details. + + Under Section 7 of GPL version 3, you are granted additional + permissions described in the GCC Runtime Library Exception, version + 3.1, as published by the Free Software Foundation. + + You should have received a copy of the GNU General Public License and + a copy of the GCC Runtime Library Exception along with this program; + see the files COPYING3 and COPYING.RUNTIME respectively. If not, see + . */ + +#include "libgomp_g.h" + +__asm__ (".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" + ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" + ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" + ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" + "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_num_threads\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" + "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_thread_num\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" + "// BEGIN GLOBAL FUNCTION DECL: abort\n" + ".extern .func abort;\n" + ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L4;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L5;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L8;\n" + "mov.u32 %r23,%tid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L7;\n" + "$L4:\n" + "mov.u32 %r24,%tid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L7;\n" + "$L5:\n" + "mov.u32 %r25,%tid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L7;\n" + "$L8:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L7:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L11;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L12;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L15;\n" + "mov.u32 %r23,%ntid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L14;\n" + "$L11:\n" + "mov.u32 %r24,%ntid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L14;\n" + "$L12:\n" + "mov.u32 %r25,%ntid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L14;\n" + "$L15:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L14:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L18;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L19;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L22;\n" + "mov.u32 %r23,%ctaid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L21;\n" + "$L18:\n" + "mov.u32 %r24,%ctaid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L21;\n" + "$L19:\n" + "mov.u32 %r25,%ctaid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L21;\n" + "$L22:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L21:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" + "{\n" + ".reg .u32 %ar1;\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .pred %r30;\n" + ".reg .u32 %r31;\n" + ".reg .pred %r32;\n" + ".reg .u32 %r33;\n" + ".reg .pred %r34;\n" + ".local .align 8 .b8 %frame[4];\n" + "ld.param.u32 %ar1,[%in_ar1];\n" + "mov.u32 %r27,%ar1;\n" + "st.local.u32 [%frame],%r27;\n" + "ld.local.u32 %r28,[%frame];\n" + "mov.u32 %r29,1;\n" + "setp.eq.u32 %r30,%r28,%r29;\n" + "@%r30 bra $L25;\n" + "mov.u32 %r31,2;\n" + "setp.eq.u32 %r32,%r28,%r31;\n" + "@%r32 bra $L26;\n" + "mov.u32 %r33,0;\n" + "setp.eq.u32 %r34,%r28,%r33;\n" + "@!%r34 bra $L29;\n" + "mov.u32 %r23,%nctaid.x;\n" + "mov.u32 %r22,%r23;\n" + "bra $L28;\n" + "$L25:\n" + "mov.u32 %r24,%nctaid.y;\n" + "mov.u32 %r22,%r24;\n" + "bra $L28;\n" + "$L26:\n" + "mov.u32 %r25,%nctaid.z;\n" + "mov.u32 %r22,%r25;\n" + "bra $L28;\n" + "$L29:\n" + "{\n" + "{\n" + "call abort;\n" + "}\n" + "}\n" + "$L28:\n" + "mov.u32 %r26,%r22;\n" + "mov.u32 %retval,%r26;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_num_threads\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" + "{\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + "mov.u32 %r26,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r26;\n" + "call (%retval_in),GOACC_ntid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r27,[%retval_in];\n" + "}\n" + "mov.u32 %r22,%r27;\n" + "mov.u32 %r28,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r28;\n" + "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r29,[%retval_in];\n" + "}\n" + "mov.u32 %r23,%r29;\n" + "mul.lo.u32 %r24,%r22,%r23;\n" + "mov.u32 %r25,%r24;\n" + "mov.u32 %retval,%r25;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n" + "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_thread_num\n" + ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" + "{\n" + ".reg .u32 %retval;\n" + ".reg .u64 %hr10;\n" + ".reg .u32 %r22;\n" + ".reg .u32 %r23;\n" + ".reg .u32 %r24;\n" + ".reg .u32 %r25;\n" + ".reg .u32 %r26;\n" + ".reg .u32 %r27;\n" + ".reg .u32 %r28;\n" + ".reg .u32 %r29;\n" + ".reg .u32 %r30;\n" + ".reg .u32 %r31;\n" + ".reg .u32 %r32;\n" + ".reg .u32 %r33;\n" + "mov.u32 %r28,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r28;\n" + "call (%retval_in),GOACC_ntid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r29,[%retval_in];\n" + "}\n" + "mov.u32 %r22,%r29;\n" + "mov.u32 %r30,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r30;\n" + "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r31,[%retval_in];\n" + "}\n" + "mov.u32 %r23,%r31;\n" + "mul.lo.u32 %r24,%r22,%r23;\n" + "mov.u32 %r32,0;\n" + "{\n" + ".param .u32 %retval_in;\n" + "{\n" + ".param .u32 %out_arg0;\n" + "st.param.u32 [%out_arg0],%r32;\n" + "call (%retval_in),GOACC_tid,(%out_arg0);\n" + "}\n" + "ld.param.u32 %r33,[%retval_in];\n" + "}\n" + "mov.u32 %r25,%r33;\n" + "add.u32 %r26,%r24,%r25;\n" + "mov.u32 %r27,%r26;\n" + "mov.u32 %retval,%r27;\n" + "st.param.u32 [%out_retval],%retval;\n" + "ret;\n" + "}\n"); diff --git a/libgomp/config/nvptx/openacc.f90 b/libgomp/config/nvptx/openacc.f90 new file mode 100644 index 00000000000..d8b5c0610b2 --- /dev/null +++ b/libgomp/config/nvptx/openacc.f90 @@ -0,0 +1,102 @@ +! OpenACC Runtime Library Definitions. + +! Copyright (C) 2014-2015 Free Software Foundation, Inc. + +! Contributed by Tobias Burnus +! and Mentor Embedded. + +! This file is part of the GNU Offloading and Multi Processing Library +! (libgomp). + +! Libgomp is free software; you can redistribute it and/or modify it +! under the terms of the GNU General Public License as published by +! the Free Software Foundation; either version 3, or (at your option) +! any later version. + +! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY +! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS +! FOR A PARTICULAR PURPOSE. See the GNU General Public License for +! more details. + +! Under Section 7 of GPL version 3, you are granted additional +! permissions described in the GCC Runtime Library Exception, version +! 3.1, as published by the Free Software Foundation. + +! You should have received a copy of the GNU General Public License and +! a copy of the GCC Runtime Library Exception along with this program; +! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see +! . + +! Wrapper functions will be built from openacc.f90. We use a separate file +! here, because for using ../../openacc.f90, implementations are required for +! all the functions that it wraps, which we currently don't provide, so linking +! would fail. + +module openacc_kinds + use iso_fortran_env, only: int32 + implicit none + + private :: int32 + public :: acc_device_kind + + integer, parameter :: acc_device_kind = int32 + + public :: acc_device_none, acc_device_default, acc_device_host + public :: acc_device_not_host, acc_device_nvidia + + ! Keep in sync with include/gomp-constants.h. + integer (acc_device_kind), parameter :: acc_device_none = 0 + integer (acc_device_kind), parameter :: acc_device_default = 1 + integer (acc_device_kind), parameter :: acc_device_host = 2 + ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed. + integer (acc_device_kind), parameter :: acc_device_not_host = 4 + integer (acc_device_kind), parameter :: acc_device_nvidia = 5 + +end module + +module openacc_internal + use openacc_kinds + implicit none + + interface + function acc_on_device_h (d) + import + integer (acc_device_kind) d + logical acc_on_device_h + end function + end interface + + interface + function acc_on_device_l (d) & + bind (C, name = "acc_on_device") + use iso_c_binding, only: c_int + integer (c_int) :: acc_on_device_l + integer (c_int), value :: d + end function + end interface +end module + +module openacc + use openacc_kinds + use openacc_internal + implicit none + + public :: acc_on_device + + interface acc_on_device + procedure :: acc_on_device_h + end interface + +end module openacc + +function acc_on_device_h (d) + use openacc_internal, only: acc_on_device_l + use openacc_kinds + integer (acc_device_kind) d + logical acc_on_device_h + if (acc_on_device_l (d) .eq. 1) then + acc_on_device_h = .TRUE. + else + acc_on_device_h = .FALSE. + end if +end function diff --git a/libgomp/oacc-init.c b/libgomp/oacc-init.c index 28b9e7a90f1..a0e62a40cc4 100644 --- a/libgomp/oacc-init.c +++ b/libgomp/oacc-init.c @@ -620,7 +620,11 @@ acc_set_device_num (int ord, acc_device_t d) ialias (acc_set_device_num) -/* Compile on_device with optimization, so that the compiler expands +/* For -O and higher, the compiler always attempts to expand acc_on_device, but + if the user disables the builtin, or calls it via a pointer, we'll need this + version. + + Compile this with optimization, so that the compiler expands this, rather than generating infinitely recursive code. */ int __attribute__ ((__optimize__ ("O2"))) diff --git a/libgomp/oacc-ptx.h b/libgomp/oacc-ptx.h deleted file mode 100644 index 2419a460c72..00000000000 --- a/libgomp/oacc-ptx.h +++ /dev/null @@ -1,426 +0,0 @@ -/* Copyright (C) 2014-2015 Free Software Foundation, Inc. - - Contributed by Mentor Embedded. - - This file is part of the GNU Offloading and Multi Processing Library - (libgomp). - - Libgomp is free software; you can redistribute it and/or modify it - under the terms of the GNU General Public License as published by - the Free Software Foundation; either version 3, or (at your option) - any later version. - - Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY - WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS - FOR A PARTICULAR PURPOSE. See the GNU General Public License for - more details. - - Under Section 7 of GPL version 3, you are granted additional - permissions described in the GCC Runtime Library Exception, version - 3.1, as published by the Free Software Foundation. - - You should have received a copy of the GNU General Public License and - a copy of the GCC Runtime Library Exception along with this program; - see the files COPYING3 and COPYING.RUNTIME respectively. If not, see - . */ - -#define ABORT_PTX \ - ".version 3.1\n" \ - ".target sm_30\n" \ - ".address_size 64\n" \ - ".visible .func abort;\n" \ - ".visible .func abort\n" \ - "{\n" \ - "trap;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func _gfortran_abort;\n" \ - ".visible .func _gfortran_abort\n" \ - "{\n" \ - "trap;\n" \ - "ret;\n" \ - "}\n" \ - -/* Generated with: - - $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline -*/ -#define ACC_ON_DEVICE_PTX \ - " .version 3.1\n" \ - " .target sm_30\n" \ - " .address_size 64\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \ - "{\n" \ - " .reg.u32 %ar1;\n" \ - ".reg.u32 %retval;\n" \ - " .reg.u64 %hr10;\n" \ - " .reg.u32 %r24;\n" \ - " .reg.u32 %r25;\n" \ - " .reg.pred %r27;\n" \ - " .reg.u32 %r30;\n" \ - " ld.param.u32 %ar1, [%in_ar1];\n" \ - " mov.u32 %r24, %ar1;\n" \ - " setp.ne.u32 %r27,%r24,4;\n" \ - " set.u32.eq.u32 %r30,%r24,5;\n" \ - " neg.s32 %r25, %r30;\n" \ - " @%r27 bra $L3;\n" \ - " mov.u32 %r25, 1;\n" \ - "$L3:\n" \ - " mov.u32 %retval, %r25;\n" \ - " st.param.u32 [%out_retval], %retval;\n" \ - " ret;\n" \ - " }\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \ - ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \ - "{\n" \ - " .reg.u64 %ar1;\n" \ - ".reg.u32 %retval;\n" \ - " .reg.u64 %hr10;\n" \ - " .reg.u64 %r25;\n" \ - " .reg.u32 %r26;\n" \ - " .reg.u32 %r27;\n" \ - " ld.param.u64 %ar1, [%in_ar1];\n" \ - " mov.u64 %r25, %ar1;\n" \ - " ld.u32 %r26, [%r25];\n" \ - " {\n" \ - " .param.u32 %retval_in;\n" \ - " {\n" \ - " .param.u32 %out_arg0;\n" \ - " st.param.u32 [%out_arg0], %r26;\n" \ - " call (%retval_in), acc_on_device, (%out_arg0);\n" \ - " }\n" \ - " ld.param.u32 %r27, [%retval_in];\n" \ - "}\n" \ - " mov.u32 %retval, %r27;\n" \ - " st.param.u32 [%out_retval], %retval;\n" \ - " ret;\n" \ - " }" - - #define GOACC_INTERNAL_PTX \ - ".version 3.1\n" \ - ".target sm_30\n" \ - ".address_size 64\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \ - ".extern .func abort;\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L4;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L5;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L8;\n" \ - "mov.u32 %r23,%tid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L7;\n" \ - "$L4:\n" \ - "mov.u32 %r24,%tid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L7;\n" \ - "$L5:\n" \ - "mov.u32 %r25,%tid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L7;\n" \ - "$L8:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L7:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L11;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L12;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L15;\n" \ - "mov.u32 %r23,%ntid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L14;\n" \ - "$L11:\n" \ - "mov.u32 %r24,%ntid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L14;\n" \ - "$L12:\n" \ - "mov.u32 %r25,%ntid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L14;\n" \ - "$L15:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L14:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L18;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L19;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L22;\n" \ - "mov.u32 %r23,%ctaid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L21;\n" \ - "$L18:\n" \ - "mov.u32 %r24,%ctaid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L21;\n" \ - "$L19:\n" \ - "mov.u32 %r25,%ctaid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L21;\n" \ - "$L22:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L21:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \ - "{\n" \ - ".reg .u32 %ar1;\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .pred %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .pred %r32;\n" \ - ".reg .u32 %r33;\n" \ - ".reg .pred %r34;\n" \ - ".local .align 8 .b8 %frame[4];\n" \ - "ld.param.u32 %ar1,[%in_ar1];\n" \ - "mov.u32 %r27,%ar1;\n" \ - "st.local.u32 [%frame],%r27;\n" \ - "ld.local.u32 %r28,[%frame];\n" \ - "mov.u32 %r29,1;\n" \ - "setp.eq.u32 %r30,%r28,%r29;\n" \ - "@%r30 bra $L25;\n" \ - "mov.u32 %r31,2;\n" \ - "setp.eq.u32 %r32,%r28,%r31;\n" \ - "@%r32 bra $L26;\n" \ - "mov.u32 %r33,0;\n" \ - "setp.eq.u32 %r34,%r28,%r33;\n" \ - "@!%r34 bra $L29;\n" \ - "mov.u32 %r23,%nctaid.x;\n" \ - "mov.u32 %r22,%r23;\n" \ - "bra $L28;\n" \ - "$L25:\n" \ - "mov.u32 %r24,%nctaid.y;\n" \ - "mov.u32 %r22,%r24;\n" \ - "bra $L28;\n" \ - "$L26:\n" \ - "mov.u32 %r25,%nctaid.z;\n" \ - "mov.u32 %r22,%r25;\n" \ - "bra $L28;\n" \ - "$L29:\n" \ - "{\n" \ - "{\n" \ - "call abort;\n" \ - "}\n" \ - "}\n" \ - "$L28:\n" \ - "mov.u32 %r26,%r22;\n" \ - "mov.u32 %retval,%r26;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \ - "{\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - "mov.u32 %r26,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r26;\n" \ - "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r27,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r22,%r27;\n" \ - "mov.u32 %r28,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r28;\n" \ - "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r29,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r23,%r29;\n" \ - "mul.lo.u32 %r24,%r22,%r23;\n" \ - "mov.u32 %r25,%r24;\n" \ - "mov.u32 %retval,%r25;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" \ - ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \ - "{\n" \ - ".reg .u32 %retval;\n" \ - ".reg .u64 %hr10;\n" \ - ".reg .u32 %r22;\n" \ - ".reg .u32 %r23;\n" \ - ".reg .u32 %r24;\n" \ - ".reg .u32 %r25;\n" \ - ".reg .u32 %r26;\n" \ - ".reg .u32 %r27;\n" \ - ".reg .u32 %r28;\n" \ - ".reg .u32 %r29;\n" \ - ".reg .u32 %r30;\n" \ - ".reg .u32 %r31;\n" \ - ".reg .u32 %r32;\n" \ - ".reg .u32 %r33;\n" \ - "mov.u32 %r28,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r28;\n" \ - "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r29,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r22,%r29;\n" \ - "mov.u32 %r30,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r30;\n" \ - "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r31,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r23,%r31;\n" \ - "mul.lo.u32 %r24,%r22,%r23;\n" \ - "mov.u32 %r32,0;\n" \ - "{\n" \ - ".param .u32 %retval_in;\n" \ - "{\n" \ - ".param .u32 %out_arg0;\n" \ - "st.param.u32 [%out_arg0],%r32;\n" \ - "call (%retval_in),GOACC_tid,(%out_arg0);\n" \ - "}\n" \ - "ld.param.u32 %r33,[%retval_in];\n" \ - "}\n" \ - "mov.u32 %r25,%r33;\n" \ - "add.u32 %r26,%r24,%r25;\n" \ - "mov.u32 %r27,%r26;\n" \ - "mov.u32 %retval,%r27;\n" \ - "st.param.u32 [%out_retval],%retval;\n" \ - "ret;\n" \ - "}\n" diff --git a/libgomp/plugin/plugin-nvptx.c b/libgomp/plugin/plugin-nvptx.c index cedcc592bf0..9b846378a11 100644 --- a/libgomp/plugin/plugin-nvptx.c +++ b/libgomp/plugin/plugin-nvptx.c @@ -34,7 +34,6 @@ #include "openacc.h" #include "config.h" #include "libgomp-plugin.h" -#include "oacc-ptx.h" #include "oacc-plugin.h" #include "gomp-constants.h" @@ -750,35 +749,6 @@ link_ptx (CUmodule *module, const struct targ_ptx_obj *ptx_objs, if (r != CUDA_SUCCESS) GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r)); - char *abort_ptx = ABORT_PTX; - r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx, - strlen (abort_ptx) + 1, 0, 0, 0, 0); - if (r != CUDA_SUCCESS) - { - GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); - GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r)); - } - - char *acc_on_device_ptx = ACC_ON_DEVICE_PTX; - r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx, - strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0); - if (r != CUDA_SUCCESS) - { - GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); - GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s", - cuda_error (r)); - } - - char *goacc_internal_ptx = GOACC_INTERNAL_PTX; - r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx, - strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0); - if (r != CUDA_SUCCESS) - { - GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]); - GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s", - cuda_error (r)); - } - for (; num_objs--; ptx_objs++) { /* cuLinkAddData's 'data' argument erroneously omits the const -- 2.30.2