From 74249f80df4e6128da38dfb5dbf5f61285c673a2 Mon Sep 17 00:00:00 2001 From: Tony Gutierrez Date: Wed, 26 Oct 2016 22:48:45 -0400 Subject: [PATCH] hsail,gpu-compute: fixes to appease clang++ fixes to appease clang++. tested on: Ubuntu clang version 3.5.0-4ubuntu2~trusty2 (tags/RELEASE_350/final) (based on LLVM 3.5.0) Ubuntu clang version 3.6.0-2ubuntu1~trusty1 (tags/RELEASE_360/final) (based on LLVM 3.6.0) the fixes address the following five issues: 1) the exec continuations in gpu_static_inst.hh were marked as protected when they should be public. here we mark them as public 2) the Abs instruction uses std::abs() in its execute method. because Abs is templated, it can also operate on U32 and U64, types, which cause Abs::execute() to pass uint32_t and uint64_t types to std::abs() respectively. this triggers a warning because std::abs() has no effect in this case. to rememdy this we add template specialization for the execute() method of Abs when its template paramter is U32 or U64. 3) Some potocols that utilize the code in cprintf.hh were missing includes to BoolVec.hh, which defines operator<< for the BoolVec type. This would cause issues when the generated code would try to pass a BoolVec type to a method in cprintf.hh that used operator<< on an instance of a BoolVec. 4) Surprise, clang doesn't like it when you clobber all the bits in a newly allocated object. I.e., this code: tlb = new GpuTlbEntry\[size\]; std::memset(tlb, 0, sizeof(GpuTlbEntry) \* size); Let's use std::vector to track the TLB entries in the GpuTlb now... 5) There were a few variables used only in DPRINTFs, so we mark them with M5_VAR_USED. --- src/arch/hsail/gen.py | 46 +++++++++++++++++++++++++++ src/arch/x86/process.cc | 9 ++++-- src/gpu-compute/gpu_static_inst.hh | 2 +- src/gpu-compute/gpu_tlb.cc | 10 ++---- src/gpu-compute/gpu_tlb.hh | 2 +- src/mem/slicc/symbols/StateMachine.py | 1 + 6 files changed, 59 insertions(+), 11 deletions(-) diff --git a/src/arch/hsail/gen.py b/src/arch/hsail/gen.py index 92fc5c510..ec6ceec3d 100755 --- a/src/arch/hsail/gen.py +++ b/src/arch/hsail/gen.py @@ -776,6 +776,52 @@ gen('Call', base_class='SpecialInstNoSrcNoDest') # Generate file epilogs # ############### +header_code(''' +template<> +inline void +Abs::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->getPred(); + + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { + if (mask[lane]) { + CType dest_val; + CType src_val; + + src_val = this->src[0].template get(w, lane); + + dest_val = (CType)(src_val); + + this->dest.set(w, lane, dest_val); + } + } +} + +template<> +inline void +Abs::execute(GPUDynInstPtr gpuDynInst) +{ + Wavefront *w = gpuDynInst->wavefront(); + + const VectorMask &mask = w->getPred(); + + for (int lane = 0; lane < w->computeUnit->wfSize(); ++lane) { + if (mask[lane]) { + CType dest_val; + CType src_val; + + src_val = this->src[0].template get(w, lane); + + dest_val = (CType)(src_val); + + this->dest.set(w, lane, dest_val); + } + } +} +''') + header_code.dedent() header_code(''' } // namespace HsailISA diff --git a/src/arch/x86/process.cc b/src/arch/x86/process.cc index 66a520bc3..4b71357b0 100644 --- a/src/arch/x86/process.cc +++ b/src/arch/x86/process.cc @@ -73,7 +73,10 @@ static const int ArgumentReg[] = { INTREG_R8W, INTREG_R9W }; -static const int NumArgumentRegs = sizeof(ArgumentReg) / sizeof(const int); + +static const int NumArgumentRegs M5_VAR_USED = + sizeof(ArgumentReg) / sizeof(const int); + static const int ArgumentReg32[] = { INTREG_EBX, INTREG_ECX, @@ -82,7 +85,9 @@ static const int ArgumentReg32[] = { INTREG_EDI, INTREG_EBP }; -static const int NumArgumentRegs32 = sizeof(ArgumentReg) / sizeof(const int); + +static const int NumArgumentRegs32 M5_VAR_USED = + sizeof(ArgumentReg) / sizeof(const int); X86LiveProcess::X86LiveProcess(LiveProcessParams * params, ObjectFile *objFile, SyscallDesc *_syscallDescs, int _numSyscallDescs) : diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh index e851c52e6..372eee8df 100644 --- a/src/gpu-compute/gpu_static_inst.hh +++ b/src/gpu-compute/gpu_static_inst.hh @@ -221,7 +221,6 @@ class GPUStaticInst : public GPUStaticInstFlags void setFlag(Flags flag) { _flags[flag] = true; } - protected: virtual void execLdAcq(GPUDynInstPtr gpuDynInst) { @@ -246,6 +245,7 @@ class GPUStaticInst : public GPUStaticInstFlags fatal("calling execAtomicAcq() on a non-atomic instruction.\n"); } + protected: const std::string opcode; std::string disassembly; int _instNum; diff --git a/src/gpu-compute/gpu_tlb.cc b/src/gpu-compute/gpu_tlb.cc index 2021af9a9..1f1a4cc61 100644 --- a/src/gpu-compute/gpu_tlb.cc +++ b/src/gpu-compute/gpu_tlb.cc @@ -71,16 +71,15 @@ namespace X86ISA accessDistance = p->accessDistance; clock = p->clk_domain->clockPeriod(); - tlb = new GpuTlbEntry[size]; - std::memset(tlb, 0, sizeof(GpuTlbEntry) * size); + tlb.assign(size, GpuTlbEntry()); freeList.resize(numSets); entryList.resize(numSets); for (int set = 0; set < numSets; ++set) { for (int way = 0; way < assoc; ++way) { - int x = set*assoc + way; - freeList[set].push_back(&tlb[x]); + int x = set * assoc + way; + freeList[set].push_back(&tlb.at(x)); } } @@ -133,9 +132,6 @@ namespace X86ISA { // make sure all the hash-maps are empty assert(translationReturnEvent.empty()); - - // delete the TLB - delete[] tlb; } BaseSlavePort& diff --git a/src/gpu-compute/gpu_tlb.hh b/src/gpu-compute/gpu_tlb.hh index 3549c598b..9c1a7b326 100644 --- a/src/gpu-compute/gpu_tlb.hh +++ b/src/gpu-compute/gpu_tlb.hh @@ -170,7 +170,7 @@ namespace X86ISA */ bool accessDistance; - GpuTlbEntry *tlb; + std::vector tlb; /* * It's a per-set list. As long as we have not reached diff --git a/src/mem/slicc/symbols/StateMachine.py b/src/mem/slicc/symbols/StateMachine.py index 6a398423f..3f88d8387 100644 --- a/src/mem/slicc/symbols/StateMachine.py +++ b/src/mem/slicc/symbols/StateMachine.py @@ -459,6 +459,7 @@ void unset_tbe(${{self.TBEType.c_ident}}*& m_tbe_ptr); #include #include "base/compiler.hh" +#include "mem/ruby/common/BoolVec.hh" #include "base/cprintf.hh" ''') -- 2.30.2