for (int i = 0; node->iterate_reference (i, ref); i++)
lto_output_ref (ob, ref, encoder);
}
+ if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
+ if (cnode->declare_variant_alt)
+ omp_lto_output_declare_variant_alt (ob, cnode, encoder);
}
streamer_write_uhwi_stream (ob->main_stream, 0);
input_ref (ib, node, nodes);
count--;
}
+ if (cgraph_node *cnode = dyn_cast <cgraph_node *> (node))
+ if (cnode->declare_variant_alt)
+ omp_lto_input_declare_variant_alt (ib, cnode, nodes);
}
}
/* As we do not recurse into BLOCK_SUBBLOCKS but only BLOCK_SUPERCONTEXT
collect block tree leafs and stream those. */
auto_vec<tree> block_tree_leafs;
- if (DECL_INITIAL (function))
+ if (DECL_INITIAL (function) && DECL_INITIAL (function) != error_mark_node)
collect_block_tree_leafs (DECL_INITIAL (function), block_tree_leafs);
streamer_write_uhwi (ob, block_tree_leafs.length ());
for (unsigned i = 0; i < block_tree_leafs.length (); ++i)
&& flag_incremental_link != INCREMENTAL_LINK_LTO)
/* Thunks have no body but they may be synthetized
at WPA time. */
- || DECL_ARGUMENTS (cnode->decl)))
+ || DECL_ARGUMENTS (cnode->decl)
+ || cnode->declare_variant_alt))
output_function (cnode);
else if ((vnode = dyn_cast <varpool_node *> (snode))
&& (DECL_INITIAL (vnode->decl) != error_mark_node
lto_symtab_encoder_t compute_ltrans_boundary (lto_symtab_encoder_t encoder);
void select_what_to_stream (void);
+/* In omp-general.c. */
+void omp_lto_output_declare_variant_alt (lto_simple_output_block *,
+ cgraph_node *, lto_symtab_encoder_t);
+void omp_lto_input_declare_variant_alt (lto_input_block *, cgraph_node *,
+ vec<symtab_node *>);
+
/* In options-save.c. */
void cl_target_option_stream_out (struct output_block *, struct bitpack_d *,
struct cl_target_option *);
case TREE_LIST:
LTO_SET_PREVAIL (TREE_VALUE (t));
LTO_SET_PREVAIL (TREE_PURPOSE (t));
- LTO_NO_PREVAIL (TREE_PURPOSE (t));
break;
default:
gcc_unreachable ();
last_visited_node++;
- gcc_assert (node->definition || node->weakref);
+ gcc_assert (node->definition || node->weakref
+ || node->declare_variant_alt);
/* Compute boundary cost of callgraph edges. */
for (edge = node->callees; edge; edge = edge->next_callee)
int index;
node = dyn_cast <cgraph_node *> (ref->referring);
- gcc_assert (node->definition);
+ gcc_assert (node->definition || node->declare_variant_alt);
index = lto_symtab_encoder_lookup (partition->encoder,
node);
if (index != LCC_NOT_FOUND
#include "tree-pass.h"
#include "omp-device-properties.h"
#include "tree-iterator.h"
+#include "data-streamer.h"
+#include "streamer-hooks.h"
enum omp_requires omp_requires_mask;
? TREE_PURPOSE (TREE_VALUE (variant1)) : base);
}
+void
+omp_lto_output_declare_variant_alt (lto_simple_output_block *ob,
+ cgraph_node *node,
+ lto_symtab_encoder_t encoder)
+{
+ gcc_assert (node->declare_variant_alt);
+
+ omp_declare_variant_base_entry entry;
+ entry.base = NULL;
+ entry.node = node;
+ entry.variants = NULL;
+ omp_declare_variant_base_entry *entryp
+ = omp_declare_variant_alt->find_with_hash (&entry, DECL_UID (node->decl));
+ gcc_assert (entryp);
+
+ int nbase = lto_symtab_encoder_lookup (encoder, entryp->base);
+ gcc_assert (nbase != LCC_NOT_FOUND);
+ streamer_write_hwi_stream (ob->main_stream, nbase);
+
+ streamer_write_hwi_stream (ob->main_stream, entryp->variants->length ());
+
+ unsigned int i;
+ omp_declare_variant_entry *varentry;
+ FOR_EACH_VEC_SAFE_ELT (entryp->variants, i, varentry)
+ {
+ int nvar = lto_symtab_encoder_lookup (encoder, varentry->variant);
+ gcc_assert (nvar != LCC_NOT_FOUND);
+ streamer_write_hwi_stream (ob->main_stream, nvar);
+
+ for (widest_int *w = &varentry->score; ;
+ w = &varentry->score_in_declare_simd_clone)
+ {
+ unsigned len = w->get_len ();
+ streamer_write_hwi_stream (ob->main_stream, len);
+ const HOST_WIDE_INT *val = w->get_val ();
+ for (unsigned j = 0; j < len; j++)
+ streamer_write_hwi_stream (ob->main_stream, val[j]);
+ if (w == &varentry->score_in_declare_simd_clone)
+ break;
+ }
+
+ HOST_WIDE_INT cnt = -1;
+ HOST_WIDE_INT i = varentry->matches ? 1 : 0;
+ for (tree attr = DECL_ATTRIBUTES (entryp->base->decl);
+ attr; attr = TREE_CHAIN (attr), i += 2)
+ {
+ attr = lookup_attribute ("omp declare variant base", attr);
+ if (attr == NULL_TREE)
+ break;
+
+ if (varentry->ctx == TREE_VALUE (TREE_VALUE (attr)))
+ {
+ cnt = i;
+ break;
+ }
+ }
+
+ gcc_assert (cnt != -1);
+ streamer_write_hwi_stream (ob->main_stream, cnt);
+ }
+}
+
+void
+omp_lto_input_declare_variant_alt (lto_input_block *ib, cgraph_node *node,
+ vec<symtab_node *> nodes)
+{
+ gcc_assert (node->declare_variant_alt);
+ omp_declare_variant_base_entry *entryp
+ = ggc_cleared_alloc<omp_declare_variant_base_entry> ();
+ entryp->base = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]);
+ entryp->node = node;
+ unsigned int len = streamer_read_hwi (ib);
+ vec_alloc (entryp->variants, len);
+
+ for (unsigned int i = 0; i < len; i++)
+ {
+ omp_declare_variant_entry varentry;
+ varentry.variant
+ = dyn_cast<cgraph_node *> (nodes[streamer_read_hwi (ib)]);
+ for (widest_int *w = &varentry.score; ;
+ w = &varentry.score_in_declare_simd_clone)
+ {
+ unsigned len2 = streamer_read_hwi (ib);
+ HOST_WIDE_INT arr[WIDE_INT_MAX_ELTS];
+ gcc_assert (len2 <= WIDE_INT_MAX_ELTS);
+ for (unsigned int j = 0; j < len2; j++)
+ arr[j] = streamer_read_hwi (ib);
+ *w = widest_int::from_array (arr, len2, true);
+ if (w == &varentry.score_in_declare_simd_clone)
+ break;
+ }
+
+ HOST_WIDE_INT cnt = streamer_read_hwi (ib);
+ HOST_WIDE_INT j = 0;
+ varentry.ctx = NULL_TREE;
+ varentry.matches = (cnt & 1) ? true : false;
+ cnt &= ~HOST_WIDE_INT_1;
+ for (tree attr = DECL_ATTRIBUTES (entryp->base->decl);
+ attr; attr = TREE_CHAIN (attr), j += 2)
+ {
+ attr = lookup_attribute ("omp declare variant base", attr);
+ if (attr == NULL_TREE)
+ break;
+
+ if (cnt == j)
+ {
+ varentry.ctx = TREE_VALUE (TREE_VALUE (attr));
+ break;
+ }
+ }
+ gcc_assert (varentry.ctx != NULL_TREE);
+ entryp->variants->quick_push (varentry);
+ }
+ if (omp_declare_variant_alt == NULL)
+ omp_declare_variant_alt
+ = hash_table<omp_declare_variant_alt_hasher>::create_ggc (64);
+ *omp_declare_variant_alt->find_slot_with_hash (entryp, DECL_UID (node->decl),
+ INSERT) = entryp;
+}
/* Encode an oacc launch argument. This matches the GOMP_LAUNCH_PACK
macro on gomp-constants.h. We do not check for overflow. */
{
struct cgraph_node *node = order[i];
- if (node->definition && node->need_lto_streaming)
+ if ((node->definition || node->declare_variant_alt)
+ && node->need_lto_streaming)
{
if (gimple_has_body_p (node->decl))
lto_prepare_function_for_streaming (node);
if (DECL_ABSTRACT_P (decl))
return SYMBOL_EXTERNAL;
- if (cnode && cnode->inlined_to)
+ if (cnode && (cnode->inlined_to || cnode->declare_variant_alt))
return SYMBOL_DUPLICATE;
/* Transparent aliases are always duplicated. */
--- /dev/null
+/* { dg-do link { target vect_simd_clones } } */
+/* { dg-require-effective-target lto } */
+/* { dg-require-effective-target fpic } */
+/* { dg-require-effective-target shared } */
+/* { dg-additional-options "-fdump-tree-gimple -fdump-tree-optimized -O2 -fPIC -shared -flto -flto-partition=one" } */
+/* { dg-additional-options "-mno-sse3" { target { i?86-*-* x86_64-*-* } } } */
+
+int
+f01 (int a)
+{
+ asm volatile ("" : "+g" (a) : "g" (1) : "memory");
+ return a;
+}
+
+int
+f02 (int a)
+{
+ asm volatile ("" : "+g" (a) : "g" (2) : "memory");
+ return a;
+}
+
+int
+f03 (int a)
+{
+ asm volatile ("" : "+g" (a) : "g" (3) : "memory");
+ return a;
+}
+
+#pragma omp declare variant (f01) match (device={isa("avx512f")}) /* 4 or 8 */
+#pragma omp declare variant (f02) match (implementation={vendor(score(3):gnu)},device={kind(cpu)}) /* (1 or 2) + 3 */
+#pragma omp declare variant (f03) match (implementation={vendor(score(5):gnu)},device={kind(host)}) /* (1 or 2) + 5 */
+int
+f04 (int a)
+{
+ asm volatile ("" : "+g" (a) : "g" (4) : "memory");
+ return a;
+}
+
+#pragma omp declare simd
+int
+test1 (int x)
+{
+ /* At gimplification time, we can't decide yet which function to call. */
+ /* { dg-final { scan-tree-dump-times "f04 \\\(x" 2 "gimple" } } */
+ /* After simd clones are created, the original non-clone test1 shall
+ call f03 (score 6), the sse2/avx/avx2 clones too, but avx512f clones
+ shall call f01 with score 8. */
+ /* { dg-final { scan-ltrans-tree-dump-not "f04 \\\(x" "optimized" } } */
+ /* { dg-final { scan-ltrans-tree-dump-times "f03 \\\(x" 14 "optimized" } } */
+ /* { dg-final { scan-ltrans-tree-dump-times "f01 \\\(x" 4 "optimized" } } */
+ int a = f04 (x);
+ int b = f04 (x);
+ return a + b;
+}