From e929d65b48ad5583e995d872ca5de95b23de4d72 Mon Sep 17 00:00:00 2001 From: Julian Brown Date: Mon, 10 Feb 2020 12:26:57 -0800 Subject: [PATCH] amdgcn: Add waitcnt after LDS write instructions Data-share write (ds_write) instructions do not necessarily complete the write to LDS immediately. When a write completes, LGKM_CNT is decremented. For now, we wait until LGKM_CNT reaches zero after each ds_write instruction. This fixes a race condition in the case where LDS is read immediately after being written. This can happen with broadcast operations. 2020-09-08 Julian Brown gcc/ * config/gcn/gcn-valu.md (scatter_insn_1offset_ds): Add waitcnt. * config/gcn/gcn.md (*mov_insn, *movti_insn): Add waitcnt to ds_write alternatives. --- gcc/config/gcn/gcn-valu.md | 2 +- gcc/config/gcn/gcn.md | 8 ++++---- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/gcc/config/gcn/gcn-valu.md b/gcc/config/gcn/gcn-valu.md index 26559ff765e..e4d7f2a0f49 100644 --- a/gcc/config/gcn/gcn-valu.md +++ b/gcc/config/gcn/gcn-valu.md @@ -923,7 +923,7 @@ { addr_space_t as = INTVAL (operands[3]); static char buf[200]; - sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s", + sprintf (buf, "ds_write%%b2\t%%0, %%2 offset:%%1%s\;s_waitcnt\tlgkmcnt(0)", (AS_GDS_P (as) ? " gds" : "")); return buf; } diff --git a/gcc/config/gcn/gcn.md b/gcc/config/gcn/gcn.md index ed98d2d2706..aeb25fbb931 100644 --- a/gcc/config/gcn/gcn.md +++ b/gcc/config/gcn/gcn.md @@ -554,7 +554,7 @@ flat_load_dword\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_store_dword\t%A0, %1%O0%g0 v_mov_b32\t%0, %1 - ds_write_b32\t%A0, %1%O0 + ds_write_b32\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read_b32\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) s_mov_b32\t%0, %1 global_load_dword\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) @@ -582,7 +582,7 @@ flat_load%o1\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_store%s0\t%A0, %1%O0%g0 v_mov_b32\t%0, %1 - ds_write%b0\t%A0, %1%O0 + ds_write%b0\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read%u1\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) global_load%o1\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_store%s0\t%A0, %1%O0%g0" @@ -611,7 +611,7 @@ # flat_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\t0 flat_store_dwordx2\t%A0, %1%O0%g0 - ds_write_b64\t%A0, %1%O0 + ds_write_b64\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read_b64\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0) global_load_dwordx2\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) global_store_dwordx2\t%A0, %1%O0%g0" @@ -667,7 +667,7 @@ # global_store_dwordx4\t%A0, %1%O0%g0 global_load_dwordx4\t%0, %A1%O1%g1\;s_waitcnt\tvmcnt(0) - ds_write_b128\t%A0, %1%O0 + ds_write_b128\t%A0, %1%O0\;s_waitcnt\tlgkmcnt(0) ds_read_b128\t%0, %A1%O1\;s_waitcnt\tlgkmcnt(0)" "reload_completed && REG_P (operands[0]) -- 2.30.2