+2015-10-02 Thomas Schwinge <thomas@codesourcery.com>
+ Bernd Schmidt <bernds@codesourcery.com>
+
+ * config/nvptx/mkoffload.c (Kind, Vis): Remove enums.
+ (Token, Stmt): Remove structs.
+ (decls, vars, fns): Remove variables.
+ (alloc_comment, append_stmt, is_keyword): Remove macros.
+ (tokenize, write_token, write_tokens, alloc_stmt, rev_stmts)
+ (write_stmt, write_stmts, parse_insn, parse_list_nosemi)
+ (parse_init, parse_file): Remove functions.
+ (read_file): Accept a pointer to a length and store into it.
+ (process): Don't try to parse the input file, just write it out as
+ a string, but looking for maps. Also write out the length.
+ (main): Don't use "-S" to compile PTX code.
+
2015-10-02 Jeff Law <law@redhat.com>
* tree-ssa-dom.c (optimize_stmt): Note when loop structures need
#define COMMENT_PREFIX "#"
-typedef enum Kind
-{
- /* 0-ff used for single char tokens */
- K_symbol = 0x100, /* a symbol */
- K_label, /* a label defn (i.e. symbol:) */
- K_ident, /* other ident */
- K_dotted, /* dotted identifier */
- K_number,
- K_string,
- K_comment
-} Kind;
-
-typedef struct Token
-{
- unsigned short kind : 12;
- unsigned short space : 1; /* preceded by space */
- unsigned short end : 1; /* succeeded by end of line */
- /* Length of token */
- unsigned short len;
-
- /* Token itself */
- char const *ptr;
-} Token;
-
-/* statement info */
-typedef enum Vis
-{
- V_dot = 0, /* random pseudo */
- V_var = 1, /* var decl/defn */
- V_func = 2, /* func decl/defn */
- V_insn = 3, /* random insn */
- V_label = 4, /* label defn */
- V_comment = 5,
- V_pred = 6, /* predicate */
- V_mask = 0x7,
- V_global = 0x08, /* globalize */
- V_weak = 0x10, /* weakly globalize */
- V_no_eol = 0x20, /* no end of line */
- V_prefix_comment = 0x40 /* prefixed comment */
-} Vis;
-
-typedef struct Stmt
-{
- struct Stmt *next;
- Token *tokens;
- unsigned char vis;
- unsigned len : 12;
- unsigned sym : 12;
-} Stmt;
-
struct id_map
{
id_map *next;
char *ptx_name;
};
-static const char *read_file (FILE *);
-static Token *tokenize (const char *);
-
-static void write_token (FILE *, const Token *);
-static void write_tokens (FILE *, const Token *, unsigned, int);
-
-static Stmt *alloc_stmt (unsigned, Token *, Token *, const Token *);
-#define alloc_comment(S,E) alloc_stmt (V_comment, S, E, 0)
-#define append_stmt(V, S) ((S)->next = *(V), *(V) = (S))
-static Stmt *rev_stmts (Stmt *);
-static void write_stmt (FILE *, const Stmt *);
-static void write_stmts (FILE *, const Stmt *);
-
-static Token *parse_insn (Token *);
-static Token *parse_list_nosemi (Token *);
-static Token *parse_init (Token *);
-static Token *parse_file (Token *);
-
-static Stmt *decls;
-static Stmt *vars;
-static Stmt *fns;
-
static id_map *func_ids, **funcs_tail = &func_ids;
static id_map *var_ids, **vars_tail = &var_ids;
remember, there could be a NUL in the file itself. */
static const char *
-read_file (FILE *stream)
+read_file (FILE *stream, size_t *plen)
{
size_t alloc = 16384;
size_t base = 0;
}
}
buffer[base] = 0;
+ *plen = base;
return buffer;
}
-/* Read a token, advancing ptr.
- If we read a comment, append it to the comments block. */
-
-static Token *
-tokenize (const char *ptr)
-{
- unsigned alloc = 1000;
- unsigned num = 0;
- Token *toks = XNEWVEC (Token, alloc);
- int in_comment = 0;
- int not_comment = 0;
-
- for (;; num++)
- {
- const char *base;
- unsigned kind;
- int ws = 0;
- int eol = 0;
-
- again:
- base = ptr;
- if (in_comment)
- goto block_comment;
- switch (kind = *ptr++)
- {
- default:
- break;
-
- case '\n':
- eol = 1;
- /* Fall through */
- case ' ':
- case '\t':
- case '\r':
- case '\v':
- /* White space */
- ws = not_comment;
- goto again;
-
- case '/':
- {
- if (*ptr == '/')
- {
- /* line comment. Do not include trailing \n */
- base += 2;
- for (; *ptr; ptr++)
- if (*ptr == '\n')
- break;
- kind = K_comment;
- }
- else if (*ptr == '*')
- {
- /* block comment */
- base += 2;
- ptr++;
-
- block_comment:
- eol = in_comment;
- in_comment = 1;
- for (; *ptr; ptr++)
- {
- if (*ptr == '\n')
- {
- ptr++;
- break;
- }
- if (ptr[0] == '*' && ptr[1] == '/')
- {
- in_comment = 2;
- ptr += 2;
- break;
- }
- }
- kind = K_comment;
- }
- else
- break;
- }
- break;
-
- case '"':
- /* quoted string */
- kind = K_string;
- while (*ptr)
- if (*ptr == '"')
- {
- ptr++;
- break;
- }
- else if (*ptr++ == '\\')
- ptr++;
- break;
-
- case '.':
- if (*ptr < '0' || *ptr > '9')
- {
- kind = K_dotted;
- ws = not_comment;
- goto ident;
- }
- /* FALLTHROUGH */
- case '0'...'9':
- kind = K_number;
- goto ident;
- break;
-
- case '$': /* local labels. */
- case '%': /* register names, pseudoes etc */
- kind = K_ident;
- goto ident;
-
- case 'a'...'z':
- case 'A'...'Z':
- case '_':
- kind = K_symbol; /* possible symbol name */
- ident:
- for (; *ptr; ptr++)
- {
- if (*ptr >= 'A' && *ptr <= 'Z')
- continue;
- if (*ptr >= 'a' && *ptr <= 'z')
- continue;
- if (*ptr >= '0' && *ptr <= '9')
- continue;
- if (*ptr == '_' || *ptr == '$')
- continue;
- if (*ptr == '.' && kind != K_dotted)
- /* Idents starting with a dot, cannot have internal dots. */
- continue;
- if ((*ptr == '+' || *ptr == '-')
- && kind == K_number
- && (ptr[-1] == 'e' || ptr[-1] == 'E'
- || ptr[-1] == 'p' || ptr[-1] == 'P'))
- /* exponent */
- continue;
- break;
- }
- if (*ptr == ':')
- {
- ptr++;
- kind = K_label;
- }
- break;
- }
-
- if (alloc == num)
- {
- alloc *= 2;
- toks = XRESIZEVEC (Token, toks, alloc);
- }
- Token *tok = toks + num;
-
- tok->kind = kind;
- tok->space = ws;
- tok->end = 0;
- tok->ptr = base;
- tok->len = ptr - base - in_comment;
- in_comment &= 1;
- not_comment = kind != K_comment;
- if (eol && num)
- tok[-1].end = 1;
- if (!kind)
- break;
- }
-
- return toks;
-}
-
-/* Write an encoded token. */
-
-static void
-write_token (FILE *out, Token const *tok)
-{
- if (tok->space)
- fputc (' ', out);
-
- switch (tok->kind)
- {
- case K_string:
- {
- const char *c = tok->ptr + 1;
- size_t len = tok->len - 2;
-
- fputs ("\\\"", out);
- while (len)
- {
- const char *bs = (const char *)memchr (c, '\\', len);
- size_t l = bs ? bs - c : len;
-
- fprintf (out, "%.*s", (int)l, c);
- len -= l;
- c += l;
- if (bs)
- {
- fputs ("\\\\", out);
- len--, c++;
- }
- }
- fputs ("\\\"", out);
- }
- break;
-
- default:
- /* All other tokens shouldn't have anything magic in them */
- fprintf (out, "%.*s", tok->len, tok->ptr);
- break;
- }
- if (tok->end)
- fputs ("\\n", out);
-}
-
-static void
-write_tokens (FILE *out, Token const *toks, unsigned len, int spc)
-{
- fputs ("\t\"", out);
- for (; len--; toks++)
- write_token (out, toks);
- if (spc)
- fputs (" ", out);
- fputs ("\"", out);
-}
-
-static Stmt *
-alloc_stmt (unsigned vis, Token *tokens, Token *end, Token const *sym)
-{
- static unsigned alloc = 0;
- static Stmt *heap = 0;
-
- if (!alloc)
- {
- alloc = 1000;
- heap = XNEWVEC (Stmt, alloc);
- }
-
- Stmt *stmt = heap++;
- alloc--;
-
- tokens->space = 0;
- stmt->next = 0;
- stmt->vis = vis;
- stmt->tokens = tokens;
- stmt->len = end - tokens;
- stmt->sym = sym ? sym - tokens : ~0;
-
- return stmt;
-}
-
-static Stmt *
-rev_stmts (Stmt *stmt)
-{
- Stmt *prev = 0;
- Stmt *next;
-
- while (stmt)
- {
- next = stmt->next;
- stmt->next = prev;
- prev = stmt;
- stmt = next;
- }
-
- return prev;
-}
-
-static void
-write_stmt (FILE *out, const Stmt *stmt)
-{
- if ((stmt->vis & V_mask) != V_comment)
- {
- write_tokens (out, stmt->tokens, stmt->len,
- (stmt->vis & V_mask) == V_pred);
- fputs (stmt->vis & V_no_eol ? "\t" : "\n", out);
- }
-}
-
-static void
-write_stmts (FILE *out, const Stmt *stmts)
-{
- for (; stmts; stmts = stmts->next)
- write_stmt (out, stmts);
-}
-
-static Token *
-parse_insn (Token *tok)
-{
- unsigned depth = 0;
-
- do
- {
- Stmt *stmt;
- Token *sym = 0;
- unsigned s = V_insn;
- Token *start = tok;
-
- switch (tok++->kind)
- {
- case K_comment:
- while (tok->kind == K_comment)
- tok++;
- stmt = alloc_comment (start, tok);
- append_stmt (&fns, stmt);
- continue;
-
- case '{':
- depth++;
- break;
-
- case '}':
- depth--;
- break;
-
- case K_label:
- if (tok[-1].ptr[0] != '$')
- sym = tok - 1;
- tok[-1].end = 1;
- s = V_label;
- break;
-
- case '@':
- tok->space = 0;
- if (tok->kind == '!')
- tok++;
- if (tok->kind == K_symbol)
- sym = tok;
- tok++;
- s = V_pred;
- break;
-
- default:
- for (; tok->kind != ';'; tok++)
- {
- if (tok->kind == ',')
- tok[1].space = 0;
- else if (tok->kind == K_symbol)
- sym = tok;
- }
- tok++->end = 1;
- break;
- }
-
- stmt = alloc_stmt (s, start, tok, sym);
- append_stmt (&fns, stmt);
-
- if (!tok[-1].end && tok[0].kind == K_comment)
- {
- stmt->vis |= V_no_eol;
- stmt = alloc_comment (tok, tok + 1);
- append_stmt (&fns, stmt);
- tok++;
- }
- }
- while (depth);
-
- return tok;
-}
-
-/* comma separated list of tokens */
-
-static Token *
-parse_list_nosemi (Token *tok)
-{
- Token *start = tok;
-
- do
- if (!(++tok)->kind)
- break;
- while ((++tok)->kind == ',');
-
- tok[-1].end = 1;
- Stmt *stmt = alloc_stmt (V_dot, start, tok, 0);
- append_stmt (&decls, stmt);
-
- return tok;
-}
-
-#define is_keyword(T,S) \
- (sizeof (S) == (T)->len && !memcmp ((T)->ptr + 1, (S), (T)->len - 1))
-
-static Token *
-parse_init (Token *tok)
-{
- for (;;)
- {
- Token *start = tok;
- Token const *sym = 0;
- Stmt *stmt;
-
- if (tok->kind == K_comment)
- {
- while (tok->kind == K_comment)
- tok++;
- stmt = alloc_comment (start, tok);
- append_stmt (&vars, stmt);
- start = tok;
- }
-
- if (tok->kind == '{')
- tok[1].space = 0;
- for (; tok->kind != ',' && tok->kind != ';'; tok++)
- if (tok->kind == K_symbol)
- sym = tok;
- tok[1].space = 0;
- int end = tok++->kind == ';';
- stmt = alloc_stmt (V_insn, start, tok, sym);
- append_stmt (&vars, stmt);
- if (!tok[-1].end && tok->kind == K_comment)
- {
- stmt->vis |= V_no_eol;
- stmt = alloc_comment (tok, tok + 1);
- append_stmt (&vars, stmt);
- tok++;
- }
- if (end)
- break;
- }
- return tok;
-}
-
-static Token *
-parse_file (Token *tok)
-{
- Stmt *comment = 0;
-
- if (tok->kind == K_comment)
- {
- Token *start = tok;
-
- while (tok->kind == K_comment)
- {
- if (strncmp (tok->ptr, ":VAR_MAP ", 9) == 0)
- record_id (tok->ptr + 9, &vars_tail);
- if (strncmp (tok->ptr, ":FUNC_MAP ", 10) == 0)
- record_id (tok->ptr + 10, &funcs_tail);
- tok++;
- }
- comment = alloc_comment (start, tok);
- comment->vis |= V_prefix_comment;
- }
-
- if (tok->kind == K_dotted)
- {
- if (is_keyword (tok, "version")
- || is_keyword (tok, "target")
- || is_keyword (tok, "address_size"))
- {
- if (comment)
- append_stmt (&decls, comment);
- tok = parse_list_nosemi (tok);
- }
- else
- {
- unsigned vis = 0;
- const Token *def = 0;
- unsigned is_decl = 0;
- Token *start;
-
- for (start = tok;
- tok->kind && tok->kind != '=' && tok->kind != K_comment
- && tok->kind != '{' && tok->kind != ';'; tok++)
- {
- if (is_keyword (tok, "global")
- || is_keyword (tok, "const"))
- vis |= V_var;
- else if (is_keyword (tok, "func")
- || is_keyword (tok, "entry"))
- vis |= V_func;
- else if (is_keyword (tok, "visible"))
- vis |= V_global;
- else if (is_keyword (tok, "extern"))
- is_decl = 1;
- else if (is_keyword (tok, "weak"))
- vis |= V_weak;
- if (tok->kind == '(')
- {
- tok[1].space = 0;
- tok[0].space = 1;
- }
- else if (tok->kind == ')' && tok[1].kind != ';')
- tok[1].space = 1;
-
- if (tok->kind == K_symbol)
- def = tok;
- }
-
- if (!tok->kind)
- {
- /* end of file */
- if (comment)
- append_stmt (&fns, comment);
- }
- else if (tok->kind == '{'
- || tok->kind == K_comment)
- {
- /* function defn */
- Stmt *stmt = alloc_stmt (vis, start, tok, def);
- if (comment)
- {
- append_stmt (&fns, comment);
- stmt->vis |= V_prefix_comment;
- }
- append_stmt (&fns, stmt);
- tok = parse_insn (tok);
- }
- else
- {
- int assign = tok->kind == '=';
-
- tok++->end = 1;
- if ((vis & V_mask) == V_var && !is_decl)
- {
- /* variable */
- Stmt *stmt = alloc_stmt (vis, start, tok, def);
- if (comment)
- {
- append_stmt (&vars, comment);
- stmt->vis |= V_prefix_comment;
- }
- append_stmt (&vars, stmt);
- if (assign)
- tok = parse_init (tok);
- }
- else
- {
- /* declaration */
- Stmt *stmt = alloc_stmt (vis, start, tok, 0);
- if (comment)
- {
- append_stmt (&decls, comment);
- stmt->vis |= V_prefix_comment;
- }
- append_stmt (&decls, stmt);
- }
- }
- }
- }
- else
- {
- /* Something strange. Ignore it. */
- if (comment)
- append_stmt (&fns, comment);
-
- do
- tok++;
- while (tok->kind && !tok->end);
- }
- return tok;
-}
-
/* Parse STR, saving found tokens into PVALUES and return their number.
Tokens are assumed to be delimited by ':'. */
static unsigned
static void
process (FILE *in, FILE *out)
{
- const char *input = read_file (in);
- Token *tok = tokenize (input);
+ size_t len = 0;
+ const char *input = read_file (in, &len);
const char *comma;
id_map const *id;
unsigned obj_count = 0;
unsigned ix;
- do
- tok = parse_file (tok);
- while (tok->kind);
+ /* Dump out char arrays for each PTX object file. These are
+ terminated by a NUL. */
+ for (size_t i = 0; i != len;)
+ {
+ char c;
- fprintf (out, "static const char ptx_code_%u[] = \n", obj_count++);
- write_stmts (out, rev_stmts (decls));
- write_stmts (out, rev_stmts (vars));
- write_stmts (out, rev_stmts (fns));
- fprintf (out, ";\n\n");
+ fprintf (out, "static const char ptx_code_%u[] =\n\t\"", obj_count++);
+ while ((c = input[i++]))
+ {
+ switch (c)
+ {
+ case '\r':
+ continue;
+ case '\n':
+ fprintf (out, "\\n\"\n\t\"");
+ /* Look for mappings on subsequent lines. */
+ while (strncmp (input + i, "//:", 3) == 0)
+ {
+ i += 3;
+
+ if (strncmp (input + i, "VAR_MAP ", 8) == 0)
+ record_id (input + i + 8, &vars_tail);
+ else if (strncmp (input + i, "FUNC_MAP ", 9) == 0)
+ record_id (input + i + 9, &funcs_tail);
+ else
+ abort ();
+ /* Skip to next line. */
+ while (input[i++] != '\n')
+ continue;
+ }
+ continue;
+ case '"':
+ case '\\':
+ putc ('\\', out);
+ break;
+ default:
+ break;
+ }
+ putc (c, out);
+ }
+ fprintf (out, "\";\n\n");
+ }
/* Dump out array of pointers to ptx object strings. */
fprintf (out, "static const struct ptx_obj {\n"
default:
gcc_unreachable ();
}
- obstack_ptr_grow (&argv_obstack, "-S");
for (int ix = 1; ix != argc; ix++)
{
+2015-10-02 Thomas Schwinge <thomas@codesourcery.com>
+
+ * oacc-ptx.h: Remove file, moving its content into...
+ * config/nvptx/fortran.c: ... here...
+ * config/nvptx/oacc-init.c: ..., here...
+ * config/nvptx/oacc-parallel.c: ..., and here.
+ * config/nvptx/openacc.f90: New file.
+ * plugin/plugin-nvptx.c: Don't include "oacc-ptx.h".
+ (link_ptx): Don't link in predefined bits of PTX code.
+
2015-09-30 Nathan Sidwell <nathan@codesourcery.com>
Bernd Schmidt <bernds@codesourcery.com>
+/* OpenACC Runtime Fortran wrapper routines
+
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+/* Temporary hack; this will be provided by libgfortran. */
+
+extern void _gfortran_abort (void);
+
+__asm__ ("// BEGIN GLOBAL FUNCTION DECL: _gfortran_abort\n"
+ ".visible .func _gfortran_abort;\n"
+ "// BEGIN GLOBAL FUNCTION DEF: _gfortran_abort\n"
+ ".visible .func _gfortran_abort\n"
+ "{\n"
+ "trap;\n"
+ "ret;\n"
+ "}\n");
+/* OpenACC Runtime initialization routines
+
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "openacc.h"
+
+/* For -O and higher, the compiler always attempts to expand acc_on_device, but
+ if the user disables the builtin, or calls it via a pointer, we'll need this
+ version.
+
+ Compile this with optimization, so that the compiler expands
+ this, rather than generating infinitely recursive code. */
+
+int __attribute__ ((__optimize__ ("O2")))
+acc_on_device (acc_device_t dev)
+{
+ return __builtin_acc_on_device (dev);
+}
+/* OpenACC constructs
+
+ Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+ Contributed by Mentor Embedded.
+
+ This file is part of the GNU Offloading and Multi Processing Library
+ (libgomp).
+
+ Libgomp is free software; you can redistribute it and/or modify it
+ under the terms of the GNU General Public License as published by
+ the Free Software Foundation; either version 3, or (at your option)
+ any later version.
+
+ Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+ WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+ FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+ more details.
+
+ Under Section 7 of GPL version 3, you are granted additional
+ permissions described in the GCC Runtime Library Exception, version
+ 3.1, as published by the Free Software Foundation.
+
+ You should have received a copy of the GNU General Public License and
+ a copy of the GCC Runtime Library Exception along with this program;
+ see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+ <http://www.gnu.org/licenses/>. */
+
+#include "libgomp_g.h"
+
+__asm__ (".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n"
+ "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_num_threads\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n"
+ "// BEGIN GLOBAL FUNCTION DECL: GOACC_get_thread_num\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n"
+ "// BEGIN GLOBAL FUNCTION DECL: abort\n"
+ ".extern .func abort;\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n"
+ "{\n"
+ ".reg .u32 %ar1;\n"
+ ".reg .u32 %retval;\n"
+ ".reg .u64 %hr10;\n"
+ ".reg .u32 %r22;\n"
+ ".reg .u32 %r23;\n"
+ ".reg .u32 %r24;\n"
+ ".reg .u32 %r25;\n"
+ ".reg .u32 %r26;\n"
+ ".reg .u32 %r27;\n"
+ ".reg .u32 %r28;\n"
+ ".reg .u32 %r29;\n"
+ ".reg .pred %r30;\n"
+ ".reg .u32 %r31;\n"
+ ".reg .pred %r32;\n"
+ ".reg .u32 %r33;\n"
+ ".reg .pred %r34;\n"
+ ".local .align 8 .b8 %frame[4];\n"
+ "ld.param.u32 %ar1,[%in_ar1];\n"
+ "mov.u32 %r27,%ar1;\n"
+ "st.local.u32 [%frame],%r27;\n"
+ "ld.local.u32 %r28,[%frame];\n"
+ "mov.u32 %r29,1;\n"
+ "setp.eq.u32 %r30,%r28,%r29;\n"
+ "@%r30 bra $L4;\n"
+ "mov.u32 %r31,2;\n"
+ "setp.eq.u32 %r32,%r28,%r31;\n"
+ "@%r32 bra $L5;\n"
+ "mov.u32 %r33,0;\n"
+ "setp.eq.u32 %r34,%r28,%r33;\n"
+ "@!%r34 bra $L8;\n"
+ "mov.u32 %r23,%tid.x;\n"
+ "mov.u32 %r22,%r23;\n"
+ "bra $L7;\n"
+ "$L4:\n"
+ "mov.u32 %r24,%tid.y;\n"
+ "mov.u32 %r22,%r24;\n"
+ "bra $L7;\n"
+ "$L5:\n"
+ "mov.u32 %r25,%tid.z;\n"
+ "mov.u32 %r22,%r25;\n"
+ "bra $L7;\n"
+ "$L8:\n"
+ "{\n"
+ "{\n"
+ "call abort;\n"
+ "}\n"
+ "}\n"
+ "$L7:\n"
+ "mov.u32 %r26,%r22;\n"
+ "mov.u32 %retval,%r26;\n"
+ "st.param.u32 [%out_retval],%retval;\n"
+ "ret;\n"
+ "}\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n"
+ "{\n"
+ ".reg .u32 %ar1;\n"
+ ".reg .u32 %retval;\n"
+ ".reg .u64 %hr10;\n"
+ ".reg .u32 %r22;\n"
+ ".reg .u32 %r23;\n"
+ ".reg .u32 %r24;\n"
+ ".reg .u32 %r25;\n"
+ ".reg .u32 %r26;\n"
+ ".reg .u32 %r27;\n"
+ ".reg .u32 %r28;\n"
+ ".reg .u32 %r29;\n"
+ ".reg .pred %r30;\n"
+ ".reg .u32 %r31;\n"
+ ".reg .pred %r32;\n"
+ ".reg .u32 %r33;\n"
+ ".reg .pred %r34;\n"
+ ".local .align 8 .b8 %frame[4];\n"
+ "ld.param.u32 %ar1,[%in_ar1];\n"
+ "mov.u32 %r27,%ar1;\n"
+ "st.local.u32 [%frame],%r27;\n"
+ "ld.local.u32 %r28,[%frame];\n"
+ "mov.u32 %r29,1;\n"
+ "setp.eq.u32 %r30,%r28,%r29;\n"
+ "@%r30 bra $L11;\n"
+ "mov.u32 %r31,2;\n"
+ "setp.eq.u32 %r32,%r28,%r31;\n"
+ "@%r32 bra $L12;\n"
+ "mov.u32 %r33,0;\n"
+ "setp.eq.u32 %r34,%r28,%r33;\n"
+ "@!%r34 bra $L15;\n"
+ "mov.u32 %r23,%ntid.x;\n"
+ "mov.u32 %r22,%r23;\n"
+ "bra $L14;\n"
+ "$L11:\n"
+ "mov.u32 %r24,%ntid.y;\n"
+ "mov.u32 %r22,%r24;\n"
+ "bra $L14;\n"
+ "$L12:\n"
+ "mov.u32 %r25,%ntid.z;\n"
+ "mov.u32 %r22,%r25;\n"
+ "bra $L14;\n"
+ "$L15:\n"
+ "{\n"
+ "{\n"
+ "call abort;\n"
+ "}\n"
+ "}\n"
+ "$L14:\n"
+ "mov.u32 %r26,%r22;\n"
+ "mov.u32 %retval,%r26;\n"
+ "st.param.u32 [%out_retval],%retval;\n"
+ "ret;\n"
+ "}\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n"
+ "{\n"
+ ".reg .u32 %ar1;\n"
+ ".reg .u32 %retval;\n"
+ ".reg .u64 %hr10;\n"
+ ".reg .u32 %r22;\n"
+ ".reg .u32 %r23;\n"
+ ".reg .u32 %r24;\n"
+ ".reg .u32 %r25;\n"
+ ".reg .u32 %r26;\n"
+ ".reg .u32 %r27;\n"
+ ".reg .u32 %r28;\n"
+ ".reg .u32 %r29;\n"
+ ".reg .pred %r30;\n"
+ ".reg .u32 %r31;\n"
+ ".reg .pred %r32;\n"
+ ".reg .u32 %r33;\n"
+ ".reg .pred %r34;\n"
+ ".local .align 8 .b8 %frame[4];\n"
+ "ld.param.u32 %ar1,[%in_ar1];\n"
+ "mov.u32 %r27,%ar1;\n"
+ "st.local.u32 [%frame],%r27;\n"
+ "ld.local.u32 %r28,[%frame];\n"
+ "mov.u32 %r29,1;\n"
+ "setp.eq.u32 %r30,%r28,%r29;\n"
+ "@%r30 bra $L18;\n"
+ "mov.u32 %r31,2;\n"
+ "setp.eq.u32 %r32,%r28,%r31;\n"
+ "@%r32 bra $L19;\n"
+ "mov.u32 %r33,0;\n"
+ "setp.eq.u32 %r34,%r28,%r33;\n"
+ "@!%r34 bra $L22;\n"
+ "mov.u32 %r23,%ctaid.x;\n"
+ "mov.u32 %r22,%r23;\n"
+ "bra $L21;\n"
+ "$L18:\n"
+ "mov.u32 %r24,%ctaid.y;\n"
+ "mov.u32 %r22,%r24;\n"
+ "bra $L21;\n"
+ "$L19:\n"
+ "mov.u32 %r25,%ctaid.z;\n"
+ "mov.u32 %r22,%r25;\n"
+ "bra $L21;\n"
+ "$L22:\n"
+ "{\n"
+ "{\n"
+ "call abort;\n"
+ "}\n"
+ "}\n"
+ "$L21:\n"
+ "mov.u32 %r26,%r22;\n"
+ "mov.u32 %retval,%r26;\n"
+ "st.param.u32 [%out_retval],%retval;\n"
+ "ret;\n"
+ "}\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n"
+ "{\n"
+ ".reg .u32 %ar1;\n"
+ ".reg .u32 %retval;\n"
+ ".reg .u64 %hr10;\n"
+ ".reg .u32 %r22;\n"
+ ".reg .u32 %r23;\n"
+ ".reg .u32 %r24;\n"
+ ".reg .u32 %r25;\n"
+ ".reg .u32 %r26;\n"
+ ".reg .u32 %r27;\n"
+ ".reg .u32 %r28;\n"
+ ".reg .u32 %r29;\n"
+ ".reg .pred %r30;\n"
+ ".reg .u32 %r31;\n"
+ ".reg .pred %r32;\n"
+ ".reg .u32 %r33;\n"
+ ".reg .pred %r34;\n"
+ ".local .align 8 .b8 %frame[4];\n"
+ "ld.param.u32 %ar1,[%in_ar1];\n"
+ "mov.u32 %r27,%ar1;\n"
+ "st.local.u32 [%frame],%r27;\n"
+ "ld.local.u32 %r28,[%frame];\n"
+ "mov.u32 %r29,1;\n"
+ "setp.eq.u32 %r30,%r28,%r29;\n"
+ "@%r30 bra $L25;\n"
+ "mov.u32 %r31,2;\n"
+ "setp.eq.u32 %r32,%r28,%r31;\n"
+ "@%r32 bra $L26;\n"
+ "mov.u32 %r33,0;\n"
+ "setp.eq.u32 %r34,%r28,%r33;\n"
+ "@!%r34 bra $L29;\n"
+ "mov.u32 %r23,%nctaid.x;\n"
+ "mov.u32 %r22,%r23;\n"
+ "bra $L28;\n"
+ "$L25:\n"
+ "mov.u32 %r24,%nctaid.y;\n"
+ "mov.u32 %r22,%r24;\n"
+ "bra $L28;\n"
+ "$L26:\n"
+ "mov.u32 %r25,%nctaid.z;\n"
+ "mov.u32 %r22,%r25;\n"
+ "bra $L28;\n"
+ "$L29:\n"
+ "{\n"
+ "{\n"
+ "call abort;\n"
+ "}\n"
+ "}\n"
+ "$L28:\n"
+ "mov.u32 %r26,%r22;\n"
+ "mov.u32 %retval,%r26;\n"
+ "st.param.u32 [%out_retval],%retval;\n"
+ "ret;\n"
+ "}\n"
+ "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_num_threads\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n"
+ "{\n"
+ ".reg .u32 %retval;\n"
+ ".reg .u64 %hr10;\n"
+ ".reg .u32 %r22;\n"
+ ".reg .u32 %r23;\n"
+ ".reg .u32 %r24;\n"
+ ".reg .u32 %r25;\n"
+ ".reg .u32 %r26;\n"
+ ".reg .u32 %r27;\n"
+ ".reg .u32 %r28;\n"
+ ".reg .u32 %r29;\n"
+ "mov.u32 %r26,0;\n"
+ "{\n"
+ ".param .u32 %retval_in;\n"
+ "{\n"
+ ".param .u32 %out_arg0;\n"
+ "st.param.u32 [%out_arg0],%r26;\n"
+ "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+ "}\n"
+ "ld.param.u32 %r27,[%retval_in];\n"
+ "}\n"
+ "mov.u32 %r22,%r27;\n"
+ "mov.u32 %r28,0;\n"
+ "{\n"
+ ".param .u32 %retval_in;\n"
+ "{\n"
+ ".param .u32 %out_arg0;\n"
+ "st.param.u32 [%out_arg0],%r28;\n"
+ "call (%retval_in),GOACC_nctaid,(%out_arg0);\n"
+ "}\n"
+ "ld.param.u32 %r29,[%retval_in];\n"
+ "}\n"
+ "mov.u32 %r23,%r29;\n"
+ "mul.lo.u32 %r24,%r22,%r23;\n"
+ "mov.u32 %r25,%r24;\n"
+ "mov.u32 %retval,%r25;\n"
+ "st.param.u32 [%out_retval],%retval;\n"
+ "ret;\n"
+ "}\n"
+ "// BEGIN GLOBAL FUNCTION DEF: GOACC_get_thread_num\n"
+ ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n"
+ "{\n"
+ ".reg .u32 %retval;\n"
+ ".reg .u64 %hr10;\n"
+ ".reg .u32 %r22;\n"
+ ".reg .u32 %r23;\n"
+ ".reg .u32 %r24;\n"
+ ".reg .u32 %r25;\n"
+ ".reg .u32 %r26;\n"
+ ".reg .u32 %r27;\n"
+ ".reg .u32 %r28;\n"
+ ".reg .u32 %r29;\n"
+ ".reg .u32 %r30;\n"
+ ".reg .u32 %r31;\n"
+ ".reg .u32 %r32;\n"
+ ".reg .u32 %r33;\n"
+ "mov.u32 %r28,0;\n"
+ "{\n"
+ ".param .u32 %retval_in;\n"
+ "{\n"
+ ".param .u32 %out_arg0;\n"
+ "st.param.u32 [%out_arg0],%r28;\n"
+ "call (%retval_in),GOACC_ntid,(%out_arg0);\n"
+ "}\n"
+ "ld.param.u32 %r29,[%retval_in];\n"
+ "}\n"
+ "mov.u32 %r22,%r29;\n"
+ "mov.u32 %r30,0;\n"
+ "{\n"
+ ".param .u32 %retval_in;\n"
+ "{\n"
+ ".param .u32 %out_arg0;\n"
+ "st.param.u32 [%out_arg0],%r30;\n"
+ "call (%retval_in),GOACC_ctaid,(%out_arg0);\n"
+ "}\n"
+ "ld.param.u32 %r31,[%retval_in];\n"
+ "}\n"
+ "mov.u32 %r23,%r31;\n"
+ "mul.lo.u32 %r24,%r22,%r23;\n"
+ "mov.u32 %r32,0;\n"
+ "{\n"
+ ".param .u32 %retval_in;\n"
+ "{\n"
+ ".param .u32 %out_arg0;\n"
+ "st.param.u32 [%out_arg0],%r32;\n"
+ "call (%retval_in),GOACC_tid,(%out_arg0);\n"
+ "}\n"
+ "ld.param.u32 %r33,[%retval_in];\n"
+ "}\n"
+ "mov.u32 %r25,%r33;\n"
+ "add.u32 %r26,%r24,%r25;\n"
+ "mov.u32 %r27,%r26;\n"
+ "mov.u32 %retval,%r27;\n"
+ "st.param.u32 [%out_retval],%retval;\n"
+ "ret;\n"
+ "}\n");
--- /dev/null
+! OpenACC Runtime Library Definitions.
+
+! Copyright (C) 2014-2015 Free Software Foundation, Inc.
+
+! Contributed by Tobias Burnus <burnus@net-b.de>
+! and Mentor Embedded.
+
+! This file is part of the GNU Offloading and Multi Processing Library
+! (libgomp).
+
+! Libgomp is free software; you can redistribute it and/or modify it
+! under the terms of the GNU General Public License as published by
+! the Free Software Foundation; either version 3, or (at your option)
+! any later version.
+
+! Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
+! WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
+! FOR A PARTICULAR PURPOSE. See the GNU General Public License for
+! more details.
+
+! Under Section 7 of GPL version 3, you are granted additional
+! permissions described in the GCC Runtime Library Exception, version
+! 3.1, as published by the Free Software Foundation.
+
+! You should have received a copy of the GNU General Public License and
+! a copy of the GCC Runtime Library Exception along with this program;
+! see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
+! <http://www.gnu.org/licenses/>.
+
+! Wrapper functions will be built from openacc.f90. We use a separate file
+! here, because for using ../../openacc.f90, implementations are required for
+! all the functions that it wraps, which we currently don't provide, so linking
+! would fail.
+
+module openacc_kinds
+ use iso_fortran_env, only: int32
+ implicit none
+
+ private :: int32
+ public :: acc_device_kind
+
+ integer, parameter :: acc_device_kind = int32
+
+ public :: acc_device_none, acc_device_default, acc_device_host
+ public :: acc_device_not_host, acc_device_nvidia
+
+ ! Keep in sync with include/gomp-constants.h.
+ integer (acc_device_kind), parameter :: acc_device_none = 0
+ integer (acc_device_kind), parameter :: acc_device_default = 1
+ integer (acc_device_kind), parameter :: acc_device_host = 2
+ ! integer (acc_device_kind), parameter :: acc_device_host_nonshm = 3 removed.
+ integer (acc_device_kind), parameter :: acc_device_not_host = 4
+ integer (acc_device_kind), parameter :: acc_device_nvidia = 5
+
+end module
+
+module openacc_internal
+ use openacc_kinds
+ implicit none
+
+ interface
+ function acc_on_device_h (d)
+ import
+ integer (acc_device_kind) d
+ logical acc_on_device_h
+ end function
+ end interface
+
+ interface
+ function acc_on_device_l (d) &
+ bind (C, name = "acc_on_device")
+ use iso_c_binding, only: c_int
+ integer (c_int) :: acc_on_device_l
+ integer (c_int), value :: d
+ end function
+ end interface
+end module
+
+module openacc
+ use openacc_kinds
+ use openacc_internal
+ implicit none
+
+ public :: acc_on_device
+
+ interface acc_on_device
+ procedure :: acc_on_device_h
+ end interface
+
+end module openacc
+
+function acc_on_device_h (d)
+ use openacc_internal, only: acc_on_device_l
+ use openacc_kinds
+ integer (acc_device_kind) d
+ logical acc_on_device_h
+ if (acc_on_device_l (d) .eq. 1) then
+ acc_on_device_h = .TRUE.
+ else
+ acc_on_device_h = .FALSE.
+ end if
+end function
ialias (acc_set_device_num)
-/* Compile on_device with optimization, so that the compiler expands
+/* For -O and higher, the compiler always attempts to expand acc_on_device, but
+ if the user disables the builtin, or calls it via a pointer, we'll need this
+ version.
+
+ Compile this with optimization, so that the compiler expands
this, rather than generating infinitely recursive code. */
int __attribute__ ((__optimize__ ("O2")))
+++ /dev/null
-/* Copyright (C) 2014-2015 Free Software Foundation, Inc.
-
- Contributed by Mentor Embedded.
-
- This file is part of the GNU Offloading and Multi Processing Library
- (libgomp).
-
- Libgomp is free software; you can redistribute it and/or modify it
- under the terms of the GNU General Public License as published by
- the Free Software Foundation; either version 3, or (at your option)
- any later version.
-
- Libgomp is distributed in the hope that it will be useful, but WITHOUT ANY
- WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
- FOR A PARTICULAR PURPOSE. See the GNU General Public License for
- more details.
-
- Under Section 7 of GPL version 3, you are granted additional
- permissions described in the GCC Runtime Library Exception, version
- 3.1, as published by the Free Software Foundation.
-
- You should have received a copy of the GNU General Public License and
- a copy of the GCC Runtime Library Exception along with this program;
- see the files COPYING3 and COPYING.RUNTIME respectively. If not, see
- <http://www.gnu.org/licenses/>. */
-
-#define ABORT_PTX \
- ".version 3.1\n" \
- ".target sm_30\n" \
- ".address_size 64\n" \
- ".visible .func abort;\n" \
- ".visible .func abort\n" \
- "{\n" \
- "trap;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func _gfortran_abort;\n" \
- ".visible .func _gfortran_abort\n" \
- "{\n" \
- "trap;\n" \
- "ret;\n" \
- "}\n" \
-
-/* Generated with:
-
- $ echo 'int acc_on_device(int d) { return __builtin_acc_on_device(d); } int acc_on_device_h_(int *d) { return acc_on_device(*d); }' | accel-gcc/xgcc -Baccel-gcc -x c - -o - -S -m64 -O3 -fno-builtin-acc_on_device -fno-inline
-*/
-#define ACC_ON_DEVICE_PTX \
- " .version 3.1\n" \
- " .target sm_30\n" \
- " .address_size 64\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1);\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device(.param.u32 %in_ar1)\n" \
- "{\n" \
- " .reg.u32 %ar1;\n" \
- ".reg.u32 %retval;\n" \
- " .reg.u64 %hr10;\n" \
- " .reg.u32 %r24;\n" \
- " .reg.u32 %r25;\n" \
- " .reg.pred %r27;\n" \
- " .reg.u32 %r30;\n" \
- " ld.param.u32 %ar1, [%in_ar1];\n" \
- " mov.u32 %r24, %ar1;\n" \
- " setp.ne.u32 %r27,%r24,4;\n" \
- " set.u32.eq.u32 %r30,%r24,5;\n" \
- " neg.s32 %r25, %r30;\n" \
- " @%r27 bra $L3;\n" \
- " mov.u32 %r25, 1;\n" \
- "$L3:\n" \
- " mov.u32 %retval, %r25;\n" \
- " st.param.u32 [%out_retval], %retval;\n" \
- " ret;\n" \
- " }\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1);\n" \
- ".visible .func (.param.u32 %out_retval)acc_on_device_h_(.param.u64 %in_ar1)\n" \
- "{\n" \
- " .reg.u64 %ar1;\n" \
- ".reg.u32 %retval;\n" \
- " .reg.u64 %hr10;\n" \
- " .reg.u64 %r25;\n" \
- " .reg.u32 %r26;\n" \
- " .reg.u32 %r27;\n" \
- " ld.param.u64 %ar1, [%in_ar1];\n" \
- " mov.u64 %r25, %ar1;\n" \
- " ld.u32 %r26, [%r25];\n" \
- " {\n" \
- " .param.u32 %retval_in;\n" \
- " {\n" \
- " .param.u32 %out_arg0;\n" \
- " st.param.u32 [%out_arg0], %r26;\n" \
- " call (%retval_in), acc_on_device, (%out_arg0);\n" \
- " }\n" \
- " ld.param.u32 %r27, [%retval_in];\n" \
- "}\n" \
- " mov.u32 %retval, %r27;\n" \
- " st.param.u32 [%out_retval], %retval;\n" \
- " ret;\n" \
- " }"
-
- #define GOACC_INTERNAL_PTX \
- ".version 3.1\n" \
- ".target sm_30\n" \
- ".address_size 64\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1);\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads;\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num;\n" \
- ".extern .func abort;\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_tid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L4;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L5;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L8;\n" \
- "mov.u32 %r23,%tid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L7;\n" \
- "$L4:\n" \
- "mov.u32 %r24,%tid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L7;\n" \
- "$L5:\n" \
- "mov.u32 %r25,%tid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L7;\n" \
- "$L8:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L7:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ntid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L11;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L12;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L15;\n" \
- "mov.u32 %r23,%ntid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L14;\n" \
- "$L11:\n" \
- "mov.u32 %r24,%ntid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L14;\n" \
- "$L12:\n" \
- "mov.u32 %r25,%ntid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L14;\n" \
- "$L15:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L14:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_ctaid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L18;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L19;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L22;\n" \
- "mov.u32 %r23,%ctaid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L21;\n" \
- "$L18:\n" \
- "mov.u32 %r24,%ctaid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L21;\n" \
- "$L19:\n" \
- "mov.u32 %r25,%ctaid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L21;\n" \
- "$L22:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L21:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_nctaid (.param .u32 %in_ar1)\n" \
- "{\n" \
- ".reg .u32 %ar1;\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .pred %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .pred %r32;\n" \
- ".reg .u32 %r33;\n" \
- ".reg .pred %r34;\n" \
- ".local .align 8 .b8 %frame[4];\n" \
- "ld.param.u32 %ar1,[%in_ar1];\n" \
- "mov.u32 %r27,%ar1;\n" \
- "st.local.u32 [%frame],%r27;\n" \
- "ld.local.u32 %r28,[%frame];\n" \
- "mov.u32 %r29,1;\n" \
- "setp.eq.u32 %r30,%r28,%r29;\n" \
- "@%r30 bra $L25;\n" \
- "mov.u32 %r31,2;\n" \
- "setp.eq.u32 %r32,%r28,%r31;\n" \
- "@%r32 bra $L26;\n" \
- "mov.u32 %r33,0;\n" \
- "setp.eq.u32 %r34,%r28,%r33;\n" \
- "@!%r34 bra $L29;\n" \
- "mov.u32 %r23,%nctaid.x;\n" \
- "mov.u32 %r22,%r23;\n" \
- "bra $L28;\n" \
- "$L25:\n" \
- "mov.u32 %r24,%nctaid.y;\n" \
- "mov.u32 %r22,%r24;\n" \
- "bra $L28;\n" \
- "$L26:\n" \
- "mov.u32 %r25,%nctaid.z;\n" \
- "mov.u32 %r22,%r25;\n" \
- "bra $L28;\n" \
- "$L29:\n" \
- "{\n" \
- "{\n" \
- "call abort;\n" \
- "}\n" \
- "}\n" \
- "$L28:\n" \
- "mov.u32 %r26,%r22;\n" \
- "mov.u32 %retval,%r26;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_num_threads\n" \
- "{\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- "mov.u32 %r26,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r26;\n" \
- "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r27,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r22,%r27;\n" \
- "mov.u32 %r28,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r28;\n" \
- "call (%retval_in),GOACC_nctaid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r29,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r23,%r29;\n" \
- "mul.lo.u32 %r24,%r22,%r23;\n" \
- "mov.u32 %r25,%r24;\n" \
- "mov.u32 %retval,%r25;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n" \
- ".visible .func (.param .u32 %out_retval) GOACC_get_thread_num\n" \
- "{\n" \
- ".reg .u32 %retval;\n" \
- ".reg .u64 %hr10;\n" \
- ".reg .u32 %r22;\n" \
- ".reg .u32 %r23;\n" \
- ".reg .u32 %r24;\n" \
- ".reg .u32 %r25;\n" \
- ".reg .u32 %r26;\n" \
- ".reg .u32 %r27;\n" \
- ".reg .u32 %r28;\n" \
- ".reg .u32 %r29;\n" \
- ".reg .u32 %r30;\n" \
- ".reg .u32 %r31;\n" \
- ".reg .u32 %r32;\n" \
- ".reg .u32 %r33;\n" \
- "mov.u32 %r28,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r28;\n" \
- "call (%retval_in),GOACC_ntid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r29,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r22,%r29;\n" \
- "mov.u32 %r30,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r30;\n" \
- "call (%retval_in),GOACC_ctaid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r31,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r23,%r31;\n" \
- "mul.lo.u32 %r24,%r22,%r23;\n" \
- "mov.u32 %r32,0;\n" \
- "{\n" \
- ".param .u32 %retval_in;\n" \
- "{\n" \
- ".param .u32 %out_arg0;\n" \
- "st.param.u32 [%out_arg0],%r32;\n" \
- "call (%retval_in),GOACC_tid,(%out_arg0);\n" \
- "}\n" \
- "ld.param.u32 %r33,[%retval_in];\n" \
- "}\n" \
- "mov.u32 %r25,%r33;\n" \
- "add.u32 %r26,%r24,%r25;\n" \
- "mov.u32 %r27,%r26;\n" \
- "mov.u32 %retval,%r27;\n" \
- "st.param.u32 [%out_retval],%retval;\n" \
- "ret;\n" \
- "}\n"
#include "openacc.h"
#include "config.h"
#include "libgomp-plugin.h"
-#include "oacc-ptx.h"
#include "oacc-plugin.h"
#include "gomp-constants.h"
if (r != CUDA_SUCCESS)
GOMP_PLUGIN_fatal ("cuLinkCreate error: %s", cuda_error (r));
- char *abort_ptx = ABORT_PTX;
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, abort_ptx,
- strlen (abort_ptx) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (abort) error: %s", cuda_error (r));
- }
-
- char *acc_on_device_ptx = ACC_ON_DEVICE_PTX;
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, acc_on_device_ptx,
- strlen (acc_on_device_ptx) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (acc_on_device) error: %s",
- cuda_error (r));
- }
-
- char *goacc_internal_ptx = GOACC_INTERNAL_PTX;
- r = cuLinkAddData (linkstate, CU_JIT_INPUT_PTX, goacc_internal_ptx,
- strlen (goacc_internal_ptx) + 1, 0, 0, 0, 0);
- if (r != CUDA_SUCCESS)
- {
- GOMP_PLUGIN_error ("Link error log %s\n", &elog[0]);
- GOMP_PLUGIN_fatal ("cuLinkAddData (goacc_internal_ptx) error: %s",
- cuda_error (r));
- }
-
for (; num_objs--; ptx_objs++)
{
/* cuLinkAddData's 'data' argument erroneously omits the const