From 212513950ca0f5269b8e298a8b4c9a0982543449 Mon Sep 17 00:00:00 2001 From: Tom de Vries Date: Sat, 5 May 2018 07:56:21 +0000 Subject: [PATCH] [nvptx] Add workaround for subsequent bar.syncs 2018-05-05 Tom de Vries PR target/85653 * config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_3): Define. (workaround_barsyncs): New function. (nvptx_reorg): Use workaround_barsyncs. * config/nvptx/nvptx.md (define_c_enum "unspecv"): Add UNSPECV_MEMBAR. (define_expand "nvptx_membar_cta"): New define_expand. (define_insn "*nvptx_membar_cta"): New insn. From-SVN: r259967 --- gcc/ChangeLog | 10 ++++++++ gcc/config/nvptx/nvptx.c | 49 +++++++++++++++++++++++++++++++++++++++ gcc/config/nvptx/nvptx.md | 17 ++++++++++++++ 3 files changed, 76 insertions(+) diff --git a/gcc/ChangeLog b/gcc/ChangeLog index 47862a80a0d..b06b273fc91 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,13 @@ +2018-05-05 Tom de Vries + + PR target/85653 + * config/nvptx/nvptx.c (WORKAROUND_PTXJIT_BUG_3): Define. + (workaround_barsyncs): New function. + (nvptx_reorg): Use workaround_barsyncs. + * config/nvptx/nvptx.md (define_c_enum "unspecv"): Add UNSPECV_MEMBAR. + (define_expand "nvptx_membar_cta"): New define_expand. + (define_insn "*nvptx_membar_cta"): New insn. + 2018-05-04 Pekka Jääskeläinen * brig-builtins.def: Add consts to ptrs etc. in BRIG builtin defs. diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c index a0c7bc1c5cd..5608bee8a8d 100644 --- a/gcc/config/nvptx/nvptx.c +++ b/gcc/config/nvptx/nvptx.c @@ -79,6 +79,7 @@ #define WORKAROUND_PTXJIT_BUG 1 #define WORKAROUND_PTXJIT_BUG_2 1 +#define WORKAROUND_PTXJIT_BUG_3 1 /* The various PTX memory areas an object might reside in. */ enum nvptx_data_area @@ -4647,6 +4648,50 @@ prevent_branch_around_nothing (void) } #endif +#ifdef WORKAROUND_PTXJIT_BUG_3 +/* Insert two membar.cta insns inbetween two subsequent bar.sync insns. This + works around a hang observed at driver version 390.48 for sm_50. */ + +static void +workaround_barsyncs (void) +{ + bool seen_barsync = false; + for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn)) + { + if (INSN_P (insn) && recog_memoized (insn) == CODE_FOR_nvptx_barsync) + { + if (seen_barsync) + { + emit_insn_before (gen_nvptx_membar_cta (), insn); + emit_insn_before (gen_nvptx_membar_cta (), insn); + } + + seen_barsync = true; + continue; + } + + if (!seen_barsync) + continue; + + if (NOTE_P (insn) || DEBUG_INSN_P (insn)) + continue; + else if (INSN_P (insn)) + switch (recog_memoized (insn)) + { + case CODE_FOR_nvptx_fork: + case CODE_FOR_nvptx_forked: + case CODE_FOR_nvptx_joining: + case CODE_FOR_nvptx_join: + continue; + default: + break; + } + + seen_barsync = false; + } +} +#endif + /* PTX-specific reorganization - Split blocks at fork and join instructions - Compute live registers @@ -4730,6 +4775,10 @@ nvptx_reorg (void) prevent_branch_around_nothing (); #endif +#ifdef WORKAROUND_PTXJIT_BUG_3 + workaround_barsyncs (); +#endif + regstat_free_n_sets_and_refs (); df_finish_pass (true); diff --git a/gcc/config/nvptx/nvptx.md b/gcc/config/nvptx/nvptx.md index 68bba369a12..975421959e0 100644 --- a/gcc/config/nvptx/nvptx.md +++ b/gcc/config/nvptx/nvptx.md @@ -56,6 +56,7 @@ UNSPECV_XCHG UNSPECV_BARSYNC UNSPECV_MEMBAR + UNSPECV_MEMBAR_CTA UNSPECV_DIM_POS UNSPECV_FORK @@ -1481,6 +1482,22 @@ "\\tmembar.sys;" [(set_attr "predicable" "false")]) +(define_expand "nvptx_membar_cta" + [(set (match_dup 0) + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] + "" +{ + operands[0] = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (Pmode)); + MEM_VOLATILE_P (operands[0]) = 1; +}) + +(define_insn "*nvptx_membar_cta" + [(set (match_operand:BLK 0 "" "") + (unspec_volatile:BLK [(match_dup 0)] UNSPECV_MEMBAR_CTA))] + "" + "\\tmembar.cta;" + [(set_attr "predicable" "false")]) + (define_insn "nvptx_nounroll" [(unspec_volatile [(const_int 0)] UNSPECV_NOUNROLL)] "" -- 2.30.2