src/gpu-compute/gpu_command_processor.hh

   1 /*
   2  * Copyright (c) 2018 Advanced Micro Devices, Inc.
   3  * All rights reserved.
   4  *
   5  * For use for simulation and test purposes only
   6  *
   7  * Redistribution and use in source and binary forms, with or without
   8  * modification, are permitted provided that the following conditions are met:
   9  *
  10  * 1. Redistributions of source code must retain the above copyright notice,
  11  * this list of conditions and the following disclaimer.
  12  *
  13  * 2. Redistributions in binary form must reproduce the above copyright notice,
  14  * this list of conditions and the following disclaimer in the documentation
  15  * and/or other materials provided with the distribution.
  16  *
  17  * 3. Neither the name of the copyright holder nor the names of its
  18  * contributors may be used to endorse or promote products derived from this
  19  * software without specific prior written permission.
  20  *
  21  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
  22  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  23  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  24  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
  25  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
  26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
  27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
  28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
  29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
  30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
  31  * POSSIBILITY OF SUCH DAMAGE.
  32  *
  33  * Authors: Anthony Gutierrez
  34  */
  35
  36 /**
  37  * @file
  38  * The GPUCommandProcessor (CP) is responsible for accepting commands, in
  39  * the form of HSA AQL packets, from the HSA packet processor (HSAPP). The CP
  40  * works with several components, including the HSAPP and the dispatcher.
  41  * When the HSAPP sends a ready task to the CP, it will perform the necessary
  42  * operations to extract relevant data structures from memory, such as the
  43  * AQL queue descriptor and AQL packet, and initializes register state for the
  44  * task's wavefronts.
  45  */
  46
  47 #ifndef __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
  48 #define __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__
  49
  50 #include "dev/hsa/hsa_device.hh"
  51 #include "gpu-compute/hsa_queue_entry.hh"
  52
  53 struct GPUCommandProcessorParams;
  54 class GPUDispatcher;
  55 class Shader;
  56
  57 class GPUCommandProcessor : public HSADevice
  58 {
  59   public:
  60     typedef GPUCommandProcessorParams Params;
  61
  62     GPUCommandProcessor() = delete;
  63     GPUCommandProcessor(const Params &p);
  64
  65     void setShader(Shader *shader);
  66     Shader* shader();
  67
  68     void submitDispatchPkt(void *raw_pkt, uint32_t queue_id,
  69                            Addr host_pkt_addr) override;
  70     void submitVendorPkt(void *raw_pkt, uint32_t queue_id,
  71                          Addr host_pkt_addr) override;
  72     void dispatchPkt(HSAQueueEntry *task);
  73
  74     Tick write(PacketPtr pkt) override { return 0; }
  75     Tick read(PacketPtr pkt) override { return 0; }
  76     AddrRangeList getAddrRanges() const override;
  77     System *system();
  78
  79   private:
  80     Shader *_shader;
  81     GPUDispatcher &dispatcher;
  82
  83     void initABI(HSAQueueEntry *task);
  84
  85     /**
  86      * Perform a DMA read of the read_dispatch_id_field_base_byte_offset
  87      * field, which follows directly after the read_dispatch_id (the read
  88      * pointer) in the amd_hsa_queue_t struct (aka memory queue descriptor
  89      * (MQD)), to find the base address of the MQD. The MQD is the runtime's
  90      * soft representation of a HW queue descriptor (HQD).
  91      *
  92      * Any fields below the read dispatch ID in the amd_hsa_queue_t should
  93      * not change according to the HSA standard, therefore we should be able
  94      * to get them based on their known relative position to the read dispatch
  95      * ID.
  96      */
  97     class ReadDispIdOffsetDmaEvent : public DmaCallback
  98     {
  99       public:
 100         ReadDispIdOffsetDmaEvent(GPUCommandProcessor &gpu_cmd_proc,
 101                                  HSAQueueEntry *task)
 102             : DmaCallback(), readDispIdOffset(0), gpuCmdProc(gpu_cmd_proc),
 103               _task(task)
 104         {
 105         }
 106
 107         void
 108         process() override
 109         {
 110             /**
 111              * Now that the read pointer's offset from the base of
 112              * the MQD is known, we can use that to calculate the
 113              * the address of the MQD itself, the dispatcher will
 114              * DMA that into the HSAQueueEntry when a kernel is
 115              * launched.
 116              */
 117             _task->hostAMDQueueAddr
 118                 = gpuCmdProc.hsaPP->getQueueDesc(_task->queueId())
 119                     ->hostReadIndexPtr - readDispIdOffset;
 120
 121             /**
 122              * DMA a copy of the MQD into the task. Some fields of
 123              * the MQD will be used to initialize register state.
 124              */
 125             auto *mqdDmaEvent = new MQDDmaEvent(gpuCmdProc, _task);
 126             gpuCmdProc.dmaReadVirt(_task->hostAMDQueueAddr,
 127                                    sizeof(_amd_queue_t), mqdDmaEvent,
 128                                    &_task->amdQueue);
 129         }
 130
 131         uint32_t readDispIdOffset;
 132
 133       private:
 134         GPUCommandProcessor &gpuCmdProc;
 135         HSAQueueEntry *_task;
 136     };
 137
 138     /**
 139      * Perform a DMA read of the MQD that corresponds to a hardware
 140      * queue descriptor (HQD). We store a copy of the MQD in the
 141      * HSAQueueEntry object so we can send a copy of it along with
 142      * a dispatch packet, which is needed to initialize register
 143      * state.
 144      */
 145     class MQDDmaEvent : public DmaCallback
 146     {
 147       public:
 148         MQDDmaEvent(GPUCommandProcessor &gpu_cmd_proc, HSAQueueEntry *task)
 149             : DmaCallback(), gpuCmdProc(gpu_cmd_proc), _task(task)
 150         {
 151         }
 152
 153         void
 154         process() override
 155         {
 156             gpuCmdProc.dispatchPkt(_task);
 157         }
 158
 159       private:
 160         GPUCommandProcessor &gpuCmdProc;
 161         HSAQueueEntry *_task;
 162     };
 163 };
 164
 165 #endif // __DEV_HSA_GPU_COMMAND_PROCESSOR_HH__