From f6ec145fc080fa27a7427241e81668b729edc476 Mon Sep 17 00:00:00 2001 From: Kyle Roarty Date: Wed, 23 Dec 2020 19:30:37 -0600 Subject: [PATCH] gpu-compute: Fix FLAT insts decrementing lgkm count early FLAT instructions used to decrement lgkm count on execute, while the GCN3 ISA specifies that lgkm count should be decremented on data being returned or data being written. This patch changes it so that lgkm is decremented after initiateAcc (for stores) and after completeAcc (for loads) to better reflect the ISA definition. This fixes a bug where waitcnts would be satisfied even though the memory access wasn't completed, which lead to instructions using the wrong data. Change-Id: I596cb031af9cda8d47a1b5e146e4a4ffd793d36c Reviewed-on: https://gem5-review.googlesource.com/c/public/gem5/+/38696 Reviewed-by: Matt Sinclair Reviewed-by: Matthew Poremba Maintainer: Matt Sinclair Tested-by: kokoro --- src/gpu-compute/global_memory_pipeline.cc | 7 +++++++ src/gpu-compute/gpu_dyn_inst.cc | 1 - 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/src/gpu-compute/global_memory_pipeline.cc b/src/gpu-compute/global_memory_pipeline.cc index bcd93f886..a2b24e4b4 100644 --- a/src/gpu-compute/global_memory_pipeline.cc +++ b/src/gpu-compute/global_memory_pipeline.cc @@ -130,6 +130,9 @@ GlobalMemPipeline::exec() DPRINTF(GPUMem, "CU%d: WF[%d][%d]: Completing global mem instr %s\n", m->cu_id, m->simdId, m->wfSlotId, m->disassemble()); m->completeAcc(m); + if (m->isFlat() && m->isLoad()) { + w->decLGKMInstsIssued(); + } w->decVMemInstsIssued(); if (m->isLoad() || m->isAtomicRet()) { @@ -193,6 +196,10 @@ GlobalMemPipeline::exec() mp->disassemble(), mp->seqNum()); mp->initiateAcc(mp); + if (mp->isFlat() && mp->isStore()) { + mp->wavefront()->decLGKMInstsIssued(); + } + if (mp->isStore() && mp->isGlobalSeg()) { mp->wavefront()->decExpInstsIssued(); } diff --git a/src/gpu-compute/gpu_dyn_inst.cc b/src/gpu-compute/gpu_dyn_inst.cc index 03ed68951..38e4ecf49 100644 --- a/src/gpu-compute/gpu_dyn_inst.cc +++ b/src/gpu-compute/gpu_dyn_inst.cc @@ -819,7 +819,6 @@ GPUDynInst::resolveFlatSegment(const VectorMask &mask) if (executedAs() == Enums::SC_GLOBAL) { // no transormation for global segment wavefront()->execUnitId = wavefront()->flatGmUnitId; - wavefront()->decLGKMInstsIssued(); if (isLoad()) { wavefront()->rdLmReqsInPipe--; } else if (isStore()) { -- 2.30.2