gpu-compute, hsail: make the PC a byte address, not an instruction index

author Tony Gutierrez <anthony.gutierrez@amd.com>

Thu, 27 Oct 2016 02:47:43 +0000 (22:47 -0400)

committer Tony Gutierrez <anthony.gutierrez@amd.com>

Thu, 27 Oct 2016 02:47:43 +0000 (22:47 -0400)
author Tony Gutierrez <anthony.gutierrez@amd.com>
Thu, 27 Oct 2016 02:47:43 +0000 (22:47 -0400)
committer Tony Gutierrez <anthony.gutierrez@amd.com>
Thu, 27 Oct 2016 02:47:43 +0000 (22:47 -0400)
diff --git a/src/arch/hsail/gpu_isa.hh b/src/arch/hsail/gpu_isa.hh

index dbd816d9131f3ded7cffade8872035eca95efdbd..caee776f1904ca96da5164a96f22b2c2126380eb 100644 (file)
--- a/src/arch/hsail/gpu_isa.hh
+++ b/src/arch/hsail/gpu_isa.hh
@@ -38,6 +38,7 @@
  
  #include <cstdint>
  
+#include "arch/hsail/gpu_types.hh"
  #include "base/misc.hh"
  #include "gpu-compute/misc.hh"
  
@@ -71,7 +72,7 @@ namespace HsailISA
          uint32_t
          advancePC(uint32_t old_pc, GPUDynInstPtr gpuDynInst)
          {
-            return old_pc + 1;
+            return old_pc + sizeof(RawMachInst);
          }
  
        private:
diff --git a/src/arch/hsail/gpu_types.hh b/src/arch/hsail/gpu_types.hh

index 4b3a66a9a018c7ea98559e01214cdc1fe694c30e..7b6689d672713e7a02c0630338b046ba0c6f4747 100644 (file)
--- a/src/arch/hsail/gpu_types.hh
+++ b/src/arch/hsail/gpu_types.hh
@@ -51,7 +51,7 @@ namespace HsailISA
      // our model uses to represent an actual instruction. In
      // the case of HSAIL this is just an index into a list of
      // instruction objects.
-    typedef uint64_t RawMachInst;
+    typedef uint32_t RawMachInst;
  
      // The MachInst is a representation of an instruction
      // that has more information than just the machine code.
diff --git a/src/arch/hsail/insts/branch.hh b/src/arch/hsail/insts/branch.hh

index 3a520b216f7205332be5130b9855375b5f89a23b..6df6f766a9bd0d90beb2f9d78e90201b6b9cadf7 100644 (file)
--- a/src/arch/hsail/insts/branch.hh
+++ b/src/arch/hsail/insts/branch.hh
@@ -257,7 +257,7 @@ namespace HsailISA
      {
          Wavefront *w = gpuDynInst->wavefront();
  
-        const uint32_t curr_pc = w->pc();
+        const uint32_t curr_pc M5_VAR_USED = w->pc();
          const uint32_t curr_rpc = w->rpc();
          const VectorMask curr_mask = w->execMask();
  
@@ -281,7 +281,7 @@ namespace HsailISA
          }
  
          // not taken branch
-        const uint32_t false_pc = curr_pc + 1;
+        const uint32_t false_pc = nextInstAddr();
          assert(true_pc != false_pc);
          if (false_pc != rpc && true_mask.count() < curr_mask.count()) {
              VectorMask false_mask = curr_mask & ~true_mask;
diff --git a/src/arch/hsail/insts/gpu_static_inst.hh b/src/arch/hsail/insts/gpu_static_inst.hh

index 5dcfe78d5f12dc2322938b221990c12bd165ea99..bb40411ed3c1d8daa343b15bc98a2909741e5f7f 100644 (file)
--- a/src/arch/hsail/insts/gpu_static_inst.hh
+++ b/src/arch/hsail/insts/gpu_static_inst.hh
@@ -42,6 +42,7 @@
   * Defines the base class representing HSAIL GPU static instructions.
   */
  
+#include "arch/hsail/gpu_types.hh"
  #include "gpu-compute/gpu_static_inst.hh"
  
  class BrigObject;
@@ -54,7 +55,7 @@ namespace HsailISA
        public:
          HsailGPUStaticInst(const BrigObject *obj, const std::string &opcode);
          void generateDisassembly();
-        uint32_t instSize() { return 4; }
+        int instSize() const override { return sizeof(RawMachInst); }
          bool isValid() const override { return true; }
  
        protected:
diff --git a/src/gpu-compute/cl_driver.cc b/src/gpu-compute/cl_driver.cc

index d3950ec04d82647c32de92cad860af1b80c4ed03..d6d1b1334f5821579b66616f0f1c63e91da3f5f6 100644 (file)
--- a/src/gpu-compute/cl_driver.cc
+++ b/src/gpu-compute/cl_driver.cc
@@ -79,7 +79,7 @@ ClDriver::ClDriver(ClDriverParams *p)
          kernelInfo[i].code_offs = code_offs;
  
          name_offs += k->name().size() + 1;
-        code_offs += k->numInsts() * sizeof(GPUStaticInst*);
+        code_offs += k->numInsts() * sizeof(TheGpuISA::RawMachInst);
      }
  }
  
@@ -130,7 +130,8 @@ ClDriver::ioctl(LiveProcess *process, ThreadContext *tc, unsigned req)
                  HsaCode *k = kernels[i];
                  // add one for terminating '\0'
                  sizes->string_table_size += k->name().size() + 1;
-                sizes->code_size += k->numInsts() * sizeof(GPUStaticInst*);
+                sizes->code_size +=
+                    k->numInsts() * sizeof(TheGpuISA::RawMachInst);
              }
  
              sizes.copyOut(tc->getMemProxy());
diff --git a/src/gpu-compute/fetch_unit.cc b/src/gpu-compute/fetch_unit.cc

index 2ea7f1f9d3a4517358af0e5f16a76c915fc69dee..1b19a3223511200e4453f6ce9e5681d4c7672c36 100644 (file)
--- a/src/gpu-compute/fetch_unit.cc
+++ b/src/gpu-compute/fetch_unit.cc
@@ -122,11 +122,10 @@ FetchUnit::initiateFetch(Wavefront *wavefront)
       * instrutions on a 32b granularity so we must account for that here.
      */
      for (int i = 0; i < wavefront->instructionBuffer.size(); ++i) {
-        int current_inst_size =
+        vaddr +=
              wavefront->instructionBuffer.at(i)->staticInstruction()->instSize();
-        vaddr += current_inst_size / sizeof(uint32_t);
      }
-    vaddr = wavefront->basePtr +  vaddr * sizeof(GPUStaticInst*);
+    vaddr = wavefront->basePtr +  vaddr;
  
      DPRINTF(GPUTLB, "CU%d: WF[%d][%d]: Initiating fetch translation: %#x\n",
              computeUnit->cu_id, wavefront->simdId, wavefront->wfSlotId, vaddr);
diff --git a/src/gpu-compute/gpu_static_inst.cc b/src/gpu-compute/gpu_static_inst.cc

index 0f74bd5323196b83851583c4fd76fca9900c33c1..c375bf248ae58b10dc205932764287012bbb16a1 100644 (file)
--- a/src/gpu-compute/gpu_static_inst.cc
+++ b/src/gpu-compute/gpu_static_inst.cc
@@ -37,7 +37,7 @@
  
  GPUStaticInst::GPUStaticInst(const std::string &opcode)
      : executed_as(Enums::SC_NONE), opcode(opcode),
-      _instNum(0)
+      _instNum(0), _instAddr(0)
  {
      setFlag(NoOrder);
  }
diff --git a/src/gpu-compute/gpu_static_inst.hh b/src/gpu-compute/gpu_static_inst.hh

index a73ec12e39b1af5d2e92b971464d0ac0ed00f182..2fa1e0ca5797ef44832749dee07a4ad2127e6336 100644 (file)
--- a/src/gpu-compute/gpu_static_inst.hh
+++ b/src/gpu-compute/gpu_static_inst.hh
@@ -61,6 +61,9 @@ class GPUStaticInst : public GPUStaticInstFlags
  {
    public:
      GPUStaticInst(const std::string &opcode);
+    void instAddr(int inst_addr) { _instAddr = inst_addr; }
+    int instAddr() const { return _instAddr; }
+    int nextInstAddr() const { return _instAddr + instSize(); }
  
      void instNum(int num) { _instNum = num; }
  
@@ -190,7 +193,7 @@ class GPUStaticInst : public GPUStaticInstFlags
      bool isGloballyCoherent() const { return _flags[GloballyCoherent]; }
      bool isSystemCoherent() const { return _flags[SystemCoherent]; }
  
-    virtual uint32_t instSize() = 0;
+    virtual int instSize() const = 0;
  
      // only used for memory instructions
      virtual void
@@ -243,6 +246,7 @@ class GPUStaticInst : public GPUStaticInstFlags
      const std::string opcode;
      std::string disassembly;
      int _instNum;
+    int _instAddr;
      /**
       * Identifier of the immediate post-dominator instruction.
       */
@@ -286,7 +290,7 @@ class KernelLaunchStaticInst : public GPUStaticInst
      int numDstRegOperands() { return 0; }
      int numSrcRegOperands() { return 0; }
      bool isValid() const { return true; }
-    uint32_t instSize() { return 0; }
+    int instSize() const override { return 0; }
  };
  
  #endif // __GPU_STATIC_INST_HH__
diff --git a/src/gpu-compute/hsail_code.cc b/src/gpu-compute/hsail_code.cc

index b0ddf01611b60b02759062c4c3b26f22ef64a2bf..59faa67e91e59b576fd2e7106a4914e939c6315e 100644 (file)
--- a/src/gpu-compute/hsail_code.cc
+++ b/src/gpu-compute/hsail_code.cc
@@ -84,6 +84,11 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
      const BrigBase *endPtr =
          obj->getCodeSectionEntry(code_dir->nextModuleEntry);
  
+    // the instruction's byte address (relative to the base addr
+    // of the code section)
+    int inst_addr = 0;
+    // the index that points to the instruction in the instruction
+    // array
      int inst_idx = 0;
      std::vector<GPUStaticInst*> instructions;
      int funcarg_size_scope = 0;
@@ -121,7 +126,7 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
                          "kind_label, label is: %s \n",
                          obj->getString(lbl->name));
  
-                labelMap.addLabel(lbl, inst_idx, obj);
+                labelMap.addLabel(lbl, inst_addr, obj);
              }
              break;
  
@@ -175,14 +180,16 @@ HsailCode::init(const BrigDirectiveExecutable *code_dir, const BrigObject *obj,
  
                  if (iptr) {
                      DPRINTF(HSAILObject, "Initializing code, processing inst "
-                            "#%d idx %d: OPCODE=%d\n",
-                            inst_idx,  _insts.size(), instPtr->opcode);
+                            "byte addr #%d idx %d: OPCODE=%d\n", inst_addr,
+                            inst_idx, instPtr->opcode);
  
-                    TheGpuISA::RawMachInst inst_num = decoder.saveInst(iptr);
+                    TheGpuISA::RawMachInst raw_inst = decoder.saveInst(iptr);
                      iptr->instNum(inst_idx);
-                    _insts.push_back(inst_num);
+                    iptr->instAddr(inst_addr);
+                    _insts.push_back(raw_inst);
                      instructions.push_back(iptr);
                  }
+                inst_addr += sizeof(TheGpuISA::RawMachInst);
                  ++inst_idx;
              } else if (entryPtr->kind >= BRIG_KIND_OPERAND_BEGIN &&
                         entryPtr->kind < BRIG_KIND_OPERAND_END) {
diff --git a/src/gpu-compute/kernel_cfg.cc b/src/gpu-compute/kernel_cfg.cc

index ac6a81b1692efdcc71c4efcea971e25dd85421b8..de518ec8430b9dfc5707b5c9746bf4bfcd418150 100644 (file)
--- a/src/gpu-compute/kernel_cfg.cc
+++ b/src/gpu-compute/kernel_cfg.cc
@@ -63,11 +63,11 @@ ControlFlowInfo::ControlFlowInfo(const std::vector<GPUStaticInst*>& insts) :
  }
  
  BasicBlock*
-ControlFlowInfo::basicBlock(int inst_num) const {
+ControlFlowInfo::basicBlock(int inst_addr) const {
      for (auto& block: basicBlocks) {
-       int first_block_id = block->firstInstruction->instNum();
-       if (inst_num >= first_block_id &&
-               inst_num < first_block_id + block->size) {
+       int first_block_addr = block->firstInstruction->instAddr();
+       if (inst_addr >= first_block_addr && inst_addr <
+           first_block_addr + block->size * sizeof(TheGpuISA::RawMachInst)) {
             return block.get();
         }
      }
@@ -102,24 +102,23 @@ ControlFlowInfo::createBasicBlocks()
      std::set<int> leaders;
      // first instruction is a leader
      leaders.insert(0);
-    for (int i = 1; i < instructions.size(); i++) {
-        GPUStaticInst* instruction = instructions[i];
+    for (const auto &instruction : instructions) {
          if (instruction->isBranch()) {
              const int target_pc = instruction->getTargetPc();
              leaders.insert(target_pc);
-            leaders.insert(i + 1);
+            leaders.insert(instruction->nextInstAddr());
          }
      }
  
      size_t block_size = 0;
-    for (int i = 0; i < instructions.size(); i++) {
-        if (leaders.find(i) != leaders.end()) {
+    for (const auto &instruction : instructions) {
+        if (leaders.find(instruction->instAddr()) != leaders.end()) {
              uint32_t id = basicBlocks.size();
              if (id > 0) {
                  basicBlocks.back()->size = block_size;
              }
              block_size = 0;
-            basicBlocks.emplace_back(new BasicBlock(id, instructions[i]));
+            basicBlocks.emplace_back(new BasicBlock(id, instruction));
          }
          block_size++;
      }
@@ -149,7 +148,7 @@ ControlFlowInfo::connectBasicBlocks()
  
          // Unconditional jump instructions have a unique successor
          if (!last->isUnconditionalJump()) {
-            BasicBlock* next_bb = basicBlock(last->instNum() + 1);
+            BasicBlock* next_bb = basicBlock(last->nextInstAddr());
              bb->successorIds.insert(next_bb->id);
          }
      }
@@ -236,9 +235,9 @@ ControlFlowInfo::findImmediatePostDominators()
          BasicBlock* ipd_block = basicBlocks[*(candidates.begin())].get();
          if (!ipd_block->isExit()) {
              GPUStaticInst* ipd_first_inst = ipd_block->firstInstruction;
-            last_instruction->ipdInstNum(ipd_first_inst->instNum());
+            last_instruction->ipdInstNum(ipd_first_inst->instAddr());
          } else {
-            last_instruction->ipdInstNum(last_instruction->instNum() + 1);
+            last_instruction->ipdInstNum(last_instruction->nextInstAddr());
          }
      }
  }
@@ -271,8 +270,8 @@ void
  ControlFlowInfo::printBasicBlocks() const
  {
      for (GPUStaticInst* inst : instructions) {
-        int inst_num = inst->instNum();
-        std::cout << inst_num << " [" << basicBlock(inst_num)->id
+        int inst_addr = inst->instAddr();
+        std::cout << inst_addr << " [" << basicBlock(inst_addr)->id
                  << "]: " << inst->disassemble();
          if (inst->isBranch()) {
              std::cout << ", PC = " << inst->getTargetPc();
diff --git a/src/gpu-compute/kernel_cfg.hh b/src/gpu-compute/kernel_cfg.hh

index 74ea861d8fddf8f668eebd867bfa6947d0d50b20..d4959c8579b17013e62228239d66132b136d14a0 100644 (file)
--- a/src/gpu-compute/kernel_cfg.hh
+++ b/src/gpu-compute/kernel_cfg.hh
@@ -106,7 +106,7 @@ private:
  
      GPUStaticInst* lastInstruction(const BasicBlock* block) const;
  
-    BasicBlock* basicBlock(int inst_num) const;
+    BasicBlock* basicBlock(int inst_addr) const;
  
      BasicBlock* postDominator(const BasicBlock* block) const;
author	Tony Gutierrez <anthony.gutierrez@amd.com>
	Thu, 27 Oct 2016 02:47:43 +0000 (22:47 -0400)
committer	Tony Gutierrez <anthony.gutierrez@amd.com>
	Thu, 27 Oct 2016 02:47:43 +0000 (22:47 -0400)
src/arch/hsail/gpu_isa.hh		patch \| blob \| history
src/arch/hsail/gpu_types.hh		patch \| blob \| history
src/arch/hsail/insts/branch.hh		patch \| blob \| history
src/arch/hsail/insts/gpu_static_inst.hh		patch \| blob \| history
src/gpu-compute/cl_driver.cc		patch \| blob \| history
src/gpu-compute/fetch_unit.cc		patch \| blob \| history
src/gpu-compute/gpu_static_inst.cc		patch \| blob \| history
src/gpu-compute/gpu_static_inst.hh		patch \| blob \| history
src/gpu-compute/hsail_code.cc		patch \| blob \| history
src/gpu-compute/kernel_cfg.cc		patch \| blob \| history
src/gpu-compute/kernel_cfg.hh		patch \| blob \| history