cpu: Add HTM ExecContext API
[gem5.git] / src / arch / gcn3 / gpu_mem_helpers.hh
1 /*
2 * Copyright (c) 2018 Advanced Micro Devices, Inc.
3 * All rights reserved.
4 *
5 * For use for simulation and test purposes only
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions are met:
9 *
10 * 1. Redistributions of source code must retain the above copyright notice,
11 * this list of conditions and the following disclaimer.
12 *
13 * 2. Redistributions in binary form must reproduce the above copyright notice,
14 * this list of conditions and the following disclaimer in the documentation
15 * and/or other materials provided with the distribution.
16 *
17 * 3. Neither the name of the copyright holder nor the names of its
18 * contributors may be used to endorse or promote products derived from this
19 * software without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
22 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
25 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31 * POSSIBILITY OF SUCH DAMAGE.
32 *
33 * Authors: Matt Sinclair
34 */
35
36 #ifndef __ARCH_GCN3_GPU_MEM_HELPERS_HH__
37 #define __ARCH_GCN3_GPU_MEM_HELPERS_HH__
38
39 #include "arch/gcn3/insts/gpu_static_inst.hh"
40 #include "arch/gcn3/insts/op_encodings.hh"
41 #include "debug/GPUMem.hh"
42 #include "gpu-compute/gpu_dyn_inst.hh"
43
44 /**
45 * Helper function for instructions declared in op_encodings. This function
46 * takes in all of the arguments for a given memory request we are trying to
47 * initialize, then submits the request or requests depending on if the
48 * original request is aligned or unaligned.
49 */
50 template<typename T, int N>
51 inline void
52 initMemReqHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type,
53 bool is_atomic=false)
54 {
55 // local variables
56 int req_size = N * sizeof(T);
57 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
58 Addr vaddr = 0, split_addr = 0;
59 bool misaligned_acc = false;
60 RequestPtr req = nullptr, req1 = nullptr, req2 = nullptr;
61 PacketPtr pkt = nullptr, pkt1 = nullptr, pkt2 = nullptr;
62
63 gpuDynInst->resetEntireStatusVector();
64 for (int lane = 0; lane < Gcn3ISA::NumVecElemPerVecReg; ++lane) {
65 if (gpuDynInst->exec_mask[lane]) {
66 vaddr = gpuDynInst->addr[lane];
67
68 /**
69 * the base address of the cache line where the the last
70 * byte of the request will be stored.
71 */
72 split_addr = roundDown(vaddr + req_size - 1, block_size);
73
74 assert(split_addr <= vaddr || split_addr - vaddr < block_size);
75 /**
76 * if the base cache line address of the last byte is
77 * greater than the address of the first byte then we have
78 * a misaligned access.
79 */
80 misaligned_acc = split_addr > vaddr;
81
82 if (is_atomic) {
83 // make sure request is word aligned
84 assert((vaddr & 0x3) == 0);
85
86 // a given lane's atomic can't cross cache lines
87 assert(!misaligned_acc);
88
89 req = std::make_shared<Request>(vaddr, sizeof(T), 0,
90 gpuDynInst->computeUnit()->masterId(), 0,
91 gpuDynInst->wfDynId,
92 gpuDynInst->makeAtomicOpFunctor<T>(
93 &(reinterpret_cast<T*>(gpuDynInst->a_data))[lane],
94 &(reinterpret_cast<T*>(gpuDynInst->x_data))[lane]));
95 } else {
96 req = std::make_shared<Request>(vaddr, req_size, 0,
97 gpuDynInst->computeUnit()->masterId(), 0,
98 gpuDynInst->wfDynId);
99 }
100
101 if (misaligned_acc) {
102 gpuDynInst->setStatusVector(lane, 2);
103 req->splitOnVaddr(split_addr, req1, req2);
104 gpuDynInst->setRequestFlags(req1);
105 gpuDynInst->setRequestFlags(req2);
106 pkt1 = new Packet(req1, mem_req_type);
107 pkt2 = new Packet(req2, mem_req_type);
108 pkt1->dataStatic(&(reinterpret_cast<T*>(
109 gpuDynInst->d_data))[lane * N]);
110 pkt2->dataStatic(&(reinterpret_cast<T*>(
111 gpuDynInst->d_data))[lane * N + req1->getSize()]);
112 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: index: %d unaligned memory "
113 "request for %#x\n", gpuDynInst->cu_id,
114 gpuDynInst->simdId, gpuDynInst->wfSlotId, lane,
115 split_addr);
116 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt1);
117 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt2);
118 } else {
119 gpuDynInst->setStatusVector(lane, 1);
120 gpuDynInst->setRequestFlags(req);
121 pkt = new Packet(req, mem_req_type);
122 pkt->dataStatic(&(reinterpret_cast<T*>(
123 gpuDynInst->d_data))[lane * N]);
124 gpuDynInst->computeUnit()->sendRequest(gpuDynInst, lane, pkt);
125 }
126 } else { // if lane is not active, then no pending requests
127 gpuDynInst->setStatusVector(lane, 0);
128 }
129 }
130 }
131
132 /**
133 * Helper function for scalar instructions declared in op_encodings. This
134 * function takes in all of the arguments for a given memory request we are
135 * trying to initialize, then submits the request or requests depending on if
136 * the original request is aligned or unaligned.
137 */
138 template<typename T, int N>
139 inline void
140 initMemReqScalarHelper(GPUDynInstPtr gpuDynInst, MemCmd mem_req_type)
141 {
142 int req_size = N * sizeof(T);
143 int block_size = gpuDynInst->computeUnit()->cacheLineSize();
144 Addr vaddr = gpuDynInst->scalarAddr;
145
146 /**
147 * the base address of the cache line where the the last byte of
148 * the request will be stored.
149 */
150 Addr split_addr = roundDown(vaddr + req_size - 1, block_size);
151
152 assert(split_addr <= vaddr || split_addr - vaddr < block_size);
153 /**
154 * if the base cache line address of the last byte is greater
155 * than the address of the first byte then we have a misaligned
156 * access.
157 */
158 bool misaligned_acc = split_addr > vaddr;
159
160 RequestPtr req = std::make_shared<Request>(vaddr, req_size, 0,
161 gpuDynInst->computeUnit()->masterId(), 0,
162 gpuDynInst->wfDynId);
163
164 if (misaligned_acc) {
165 RequestPtr req1, req2;
166 req->splitOnVaddr(split_addr, req1, req2);
167 gpuDynInst->numScalarReqs = 2;
168 gpuDynInst->setRequestFlags(req1);
169 gpuDynInst->setRequestFlags(req2);
170 PacketPtr pkt1 = new Packet(req1, mem_req_type);
171 PacketPtr pkt2 = new Packet(req2, mem_req_type);
172 pkt1->dataStatic(gpuDynInst->scalar_data);
173 pkt2->dataStatic(gpuDynInst->scalar_data + req1->getSize());
174 DPRINTF(GPUMem, "CU%d: WF[%d][%d]: unaligned scalar memory request for"
175 " %#x\n", gpuDynInst->cu_id, gpuDynInst->simdId,
176 gpuDynInst->wfSlotId, split_addr);
177 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt1);
178 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt2);
179 } else {
180 gpuDynInst->numScalarReqs = 1;
181 gpuDynInst->setRequestFlags(req);
182 PacketPtr pkt = new Packet(req, mem_req_type);
183 pkt->dataStatic(gpuDynInst->scalar_data);
184 gpuDynInst->computeUnit()->sendScalarRequest(gpuDynInst, pkt);
185 }
186 }
187
188 #endif // __ARCH_GCN3_GPU_MEM_HELPERS_HH__